perfbd.py 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252
  1. #!/usr/bin/env python3
  2. #
  3. # Aggregate and report call-stack propagated block-device operations
  4. # from trace output.
  5. #
  6. # Example:
  7. # ./scripts/bench.py -ttrace
  8. # ./scripts/perfbd.py trace -j -Flfs.c -Flfs_util.c -Serased -Sproged -Sreaded
  9. #
  10. # Copyright (c) 2022, The littlefs authors.
  11. # SPDX-License-Identifier: BSD-3-Clause
  12. #
  13. import bisect
  14. import collections as co
  15. import csv
  16. import functools as ft
  17. import itertools as it
  18. import math as m
  19. import multiprocessing as mp
  20. import os
  21. import re
  22. import shlex
  23. import subprocess as sp
  24. OBJDUMP_TOOL = ['objdump']
  25. THRESHOLD = (0.5, 0.85)
  26. # integer fields
  27. class Int(co.namedtuple('Int', 'x')):
  28. __slots__ = ()
  29. def __new__(cls, x=0):
  30. if isinstance(x, Int):
  31. return x
  32. if isinstance(x, str):
  33. try:
  34. x = int(x, 0)
  35. except ValueError:
  36. # also accept +-∞ and +-inf
  37. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  38. x = m.inf
  39. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  40. x = -m.inf
  41. else:
  42. raise
  43. assert isinstance(x, int) or m.isinf(x), x
  44. return super().__new__(cls, x)
  45. def __str__(self):
  46. if self.x == m.inf:
  47. return '∞'
  48. elif self.x == -m.inf:
  49. return '-∞'
  50. else:
  51. return str(self.x)
  52. def __int__(self):
  53. assert not m.isinf(self.x)
  54. return self.x
  55. def __float__(self):
  56. return float(self.x)
  57. none = '%7s' % '-'
  58. def table(self):
  59. return '%7s' % (self,)
  60. diff_none = '%7s' % '-'
  61. diff_table = table
  62. def diff_diff(self, other):
  63. new = self.x if self else 0
  64. old = other.x if other else 0
  65. diff = new - old
  66. if diff == +m.inf:
  67. return '%7s' % '+∞'
  68. elif diff == -m.inf:
  69. return '%7s' % '-∞'
  70. else:
  71. return '%+7d' % diff
  72. def ratio(self, other):
  73. new = self.x if self else 0
  74. old = other.x if other else 0
  75. if m.isinf(new) and m.isinf(old):
  76. return 0.0
  77. elif m.isinf(new):
  78. return +m.inf
  79. elif m.isinf(old):
  80. return -m.inf
  81. elif not old and not new:
  82. return 0.0
  83. elif not old:
  84. return 1.0
  85. else:
  86. return (new-old) / old
  87. def __add__(self, other):
  88. return self.__class__(self.x + other.x)
  89. def __sub__(self, other):
  90. return self.__class__(self.x - other.x)
  91. def __mul__(self, other):
  92. return self.__class__(self.x * other.x)
  93. # perf results
  94. class PerfBdResult(co.namedtuple('PerfBdResult', [
  95. 'file', 'function', 'line',
  96. 'readed', 'proged', 'erased',
  97. 'children'])):
  98. _by = ['file', 'function', 'line']
  99. _fields = ['readed', 'proged', 'erased']
  100. _types = {'readed': Int, 'proged': Int, 'erased': Int}
  101. __slots__ = ()
  102. def __new__(cls, file='', function='', line=0,
  103. readed=0, proged=0, erased=0,
  104. children=[]):
  105. return super().__new__(cls, file, function, int(Int(line)),
  106. Int(readed), Int(proged), Int(erased),
  107. children)
  108. def __add__(self, other):
  109. return PerfBdResult(self.file, self.function, self.line,
  110. self.readed + other.readed,
  111. self.proged + other.proged,
  112. self.erased + other.erased,
  113. self.children + other.children)
  114. def openio(path, mode='r', buffering=-1):
  115. if path == '-':
  116. if mode == 'r':
  117. return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
  118. else:
  119. return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
  120. else:
  121. return open(path, mode, buffering)
  122. def collect_syms_and_lines(obj_path, *,
  123. objdump_tool=None,
  124. **args):
  125. symbol_pattern = re.compile(
  126. '^(?P<addr>[0-9a-fA-F]+)'
  127. '\s+.*'
  128. '\s+(?P<size>[0-9a-fA-F]+)'
  129. '\s+(?P<name>[^\s]+)\s*$')
  130. line_pattern = re.compile(
  131. '^\s+(?:'
  132. # matches dir/file table
  133. '(?P<no>[0-9]+)'
  134. '(?:\s+(?P<dir>[0-9]+))?'
  135. '\s+.*'
  136. '\s+(?P<path>[^\s]+)'
  137. # matches line opcodes
  138. '|' '\[[^\]]*\]\s+'
  139. '(?:'
  140. '(?P<op_special>Special)'
  141. '|' '(?P<op_copy>Copy)'
  142. '|' '(?P<op_end>End of Sequence)'
  143. '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
  144. '|' 'Line .*?to (?P<op_line>[0-9]+)'
  145. '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
  146. '|' '.' ')*'
  147. ')$', re.IGNORECASE)
  148. # figure out symbol addresses
  149. syms = {}
  150. sym_at = []
  151. cmd = objdump_tool + ['-t', obj_path]
  152. if args.get('verbose'):
  153. print(' '.join(shlex.quote(c) for c in cmd))
  154. proc = sp.Popen(cmd,
  155. stdout=sp.PIPE,
  156. stderr=sp.PIPE if not args.get('verbose') else None,
  157. universal_newlines=True,
  158. errors='replace',
  159. close_fds=False)
  160. for line in proc.stdout:
  161. m = symbol_pattern.match(line)
  162. if m:
  163. name = m.group('name')
  164. addr = int(m.group('addr'), 16)
  165. size = int(m.group('size'), 16)
  166. # ignore zero-sized symbols
  167. if not size:
  168. continue
  169. # note multiple symbols can share a name
  170. if name not in syms:
  171. syms[name] = set()
  172. syms[name].add((addr, size))
  173. sym_at.append((addr, name, size))
  174. proc.wait()
  175. if proc.returncode != 0:
  176. if not args.get('verbose'):
  177. for line in proc.stderr:
  178. sys.stdout.write(line)
  179. # assume no debug-info on failure
  180. pass
  181. # sort and keep largest/first when duplicates
  182. sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
  183. sym_at_ = []
  184. for addr, name, size in sym_at:
  185. if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
  186. sym_at_.append((addr, name, size))
  187. sym_at = sym_at_
  188. # state machine for dwarf line numbers, note that objdump's
  189. # decodedline seems to have issues with multiple dir/file
  190. # tables, which is why we need this
  191. lines = []
  192. line_at = []
  193. dirs = {}
  194. files = {}
  195. op_file = 1
  196. op_line = 1
  197. op_addr = 0
  198. cmd = objdump_tool + ['--dwarf=rawline', obj_path]
  199. if args.get('verbose'):
  200. print(' '.join(shlex.quote(c) for c in cmd))
  201. proc = sp.Popen(cmd,
  202. stdout=sp.PIPE,
  203. stderr=sp.PIPE if not args.get('verbose') else None,
  204. universal_newlines=True,
  205. errors='replace',
  206. close_fds=False)
  207. for line in proc.stdout:
  208. m = line_pattern.match(line)
  209. if m:
  210. if m.group('no') and not m.group('dir'):
  211. # found a directory entry
  212. dirs[int(m.group('no'))] = m.group('path')
  213. elif m.group('no'):
  214. # found a file entry
  215. dir = int(m.group('dir'))
  216. if dir in dirs:
  217. files[int(m.group('no'))] = os.path.join(
  218. dirs[dir],
  219. m.group('path'))
  220. else:
  221. files[int(m.group('no'))] = m.group('path')
  222. else:
  223. # found a state machine update
  224. if m.group('op_file'):
  225. op_file = int(m.group('op_file'), 0)
  226. if m.group('op_line'):
  227. op_line = int(m.group('op_line'), 0)
  228. if m.group('op_addr'):
  229. op_addr = int(m.group('op_addr'), 0)
  230. if (m.group('op_special')
  231. or m.group('op_copy')
  232. or m.group('op_end')):
  233. file = os.path.abspath(files.get(op_file, '?'))
  234. lines.append((file, op_line, op_addr))
  235. line_at.append((op_addr, file, op_line))
  236. if m.group('op_end'):
  237. op_file = 1
  238. op_line = 1
  239. op_addr = 0
  240. proc.wait()
  241. if proc.returncode != 0:
  242. if not args.get('verbose'):
  243. for line in proc.stderr:
  244. sys.stdout.write(line)
  245. # assume no debug-info on failure
  246. pass
  247. # sort and keep first when duplicates
  248. lines.sort()
  249. lines_ = []
  250. for file, line, addr in lines:
  251. if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
  252. lines_.append((file, line, addr))
  253. lines = lines_
  254. # sort and keep first when duplicates
  255. line_at.sort()
  256. line_at_ = []
  257. for addr, file, line in line_at:
  258. if len(line_at_) == 0 or line_at_[-1][0] != addr:
  259. line_at_.append((addr, file, line))
  260. line_at = line_at_
  261. return syms, sym_at, lines, line_at
  262. def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
  263. sources=None,
  264. everything=False,
  265. propagate=0,
  266. depth=1,
  267. **args):
  268. trace_pattern = re.compile(
  269. '^(?P<file>[^:]*):(?P<line>[0-9]+):trace:\s*(?P<prefix>[^\s]*?bd_)(?:'
  270. '(?P<read>read)\('
  271. '\s*(?P<read_ctx>\w+)' '\s*,'
  272. '\s*(?P<read_block>\w+)' '\s*,'
  273. '\s*(?P<read_off>\w+)' '\s*,'
  274. '\s*(?P<read_buffer>\w+)' '\s*,'
  275. '\s*(?P<read_size>\w+)' '\s*\)'
  276. '|' '(?P<prog>prog)\('
  277. '\s*(?P<prog_ctx>\w+)' '\s*,'
  278. '\s*(?P<prog_block>\w+)' '\s*,'
  279. '\s*(?P<prog_off>\w+)' '\s*,'
  280. '\s*(?P<prog_buffer>\w+)' '\s*,'
  281. '\s*(?P<prog_size>\w+)' '\s*\)'
  282. '|' '(?P<erase>erase)\('
  283. '\s*(?P<erase_ctx>\w+)' '\s*,'
  284. '\s*(?P<erase_block>\w+)'
  285. '\s*\(\s*(?P<erase_size>\w+)\s*\)' '\s*\)' ')\s*$')
  286. frame_pattern = re.compile(
  287. '^\s+at (?P<addr>\w+)\s*$')
  288. # parse all of the trace files for read/prog/erase operations
  289. last_filtered = False
  290. last_file = None
  291. last_line = None
  292. last_sym = None
  293. last_readed = 0
  294. last_proged = 0
  295. last_erased = 0
  296. last_stack = []
  297. last_delta = None
  298. at_cache = {}
  299. results = {}
  300. def commit():
  301. # fallback to just capturing top-level measurements
  302. if not last_stack:
  303. file = last_file
  304. sym = last_sym
  305. line = last_line
  306. # ignore filtered sources
  307. if sources is not None:
  308. if not any(
  309. os.path.abspath(file)
  310. == os.path.abspath(s)
  311. for s in sources):
  312. return
  313. else:
  314. # default to only cwd
  315. if not everything and not os.path.commonpath([
  316. os.getcwd(),
  317. os.path.abspath(file)]) == os.getcwd():
  318. return
  319. # simplify path
  320. if os.path.commonpath([
  321. os.getcwd(),
  322. os.path.abspath(file)]) == os.getcwd():
  323. file = os.path.relpath(file)
  324. else:
  325. file = os.path.abspath(file)
  326. results[(file, sym, line)] = (
  327. last_readed,
  328. last_proged,
  329. last_erased,
  330. {})
  331. else:
  332. # tail-recursively propagate measurements
  333. for i in range(len(last_stack)):
  334. results_ = results
  335. for j in reversed(range(i+1)):
  336. if i+1-j > depth:
  337. break
  338. # propagate
  339. name = last_stack[j]
  340. if name in results_:
  341. r, p, e, children = results_[name]
  342. else:
  343. r, p, e, children = 0, 0, 0, {}
  344. results_[name] = (
  345. r+last_readed,
  346. p+last_proged,
  347. e+last_erased,
  348. children)
  349. # recurse
  350. results_ = results_[name][-1]
  351. with openio(path) as f:
  352. # try to jump to middle of file? need step out of utf8-safe mode and
  353. # then resync up with the next newline to avoid parsing half a line
  354. if start is not None and start > 0:
  355. fd = f.fileno()
  356. os.lseek(fd, start, os.SEEK_SET)
  357. while os.read(fd, 1) not in {b'\n', b'\r', b''}:
  358. pass
  359. f = os.fdopen(fd)
  360. for line in f:
  361. # we have a lot of data, try to take a few shortcuts,
  362. # string search is much faster than regex so try to use
  363. # regex as late as possible.
  364. if not line.startswith('\t'):
  365. if last_filtered:
  366. commit()
  367. last_filtered = False
  368. # done processing our slice?
  369. if stop is not None:
  370. if os.lseek(f.fileno(), 0, os.SEEK_CUR) > stop:
  371. break
  372. if 'trace' in line and 'bd' in line:
  373. m = trace_pattern.match(line)
  374. if m:
  375. last_filtered = True
  376. last_file = os.path.abspath(m.group('file'))
  377. last_line = int(m.group('line'), 0)
  378. last_sym = m.group('prefix')
  379. last_readed = 0
  380. last_proged = 0
  381. last_erased = 0
  382. last_stack = []
  383. last_delta = None
  384. if m.group('read'):
  385. last_sym += m.group('read')
  386. last_readed += int(m.group('read_size'))
  387. elif m.group('prog'):
  388. last_sym += m.group('prog')
  389. last_proged += int(m.group('prog_size'))
  390. elif m.group('erase'):
  391. last_sym += m.group('erase')
  392. last_erased += int(m.group('erase_size'))
  393. elif last_filtered:
  394. m = frame_pattern.match(line)
  395. if m:
  396. addr_ = int(m.group('addr'), 0)
  397. # before we can do anything with addr, we need to
  398. # reverse ASLR, fortunately we know the file+line of
  399. # the first stack frame, so we can use that as a point
  400. # of reference
  401. if last_delta is None:
  402. i = bisect.bisect(lines, (last_file, last_line),
  403. key=lambda x: (x[0], x[1]))
  404. if i > 0:
  405. last_delta = lines[i-1][2] - addr_
  406. else:
  407. # can't reverse ASLR, give up on backtrace
  408. commit()
  409. last_filtered = False
  410. continue
  411. addr = addr_ + last_delta
  412. # cached?
  413. if addr in at_cache:
  414. cached = at_cache[addr]
  415. if cached is None:
  416. # cache says to skip
  417. continue
  418. file, sym, line = cached
  419. else:
  420. # find sym
  421. i = bisect.bisect(sym_at, addr, key=lambda x: x[0])
  422. # check that we're actually in the sym's size
  423. if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
  424. _, sym, _ = sym_at[i-1]
  425. else:
  426. sym = hex(addr)
  427. # filter out internal/unknown functions
  428. if not everything and (
  429. sym.startswith('__')
  430. or sym.startswith('0')
  431. or sym.startswith('-')
  432. or sym == '_start'):
  433. at_cache[addr] = None
  434. continue
  435. # find file+line
  436. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  437. if i > 0:
  438. _, file, line = line_at[i-1]
  439. elif len(last_stack) == 0:
  440. file, line = last_file, last_line
  441. else:
  442. file, line = re.sub('(\.o)?$', '.c', obj_path, 1), 0
  443. # ignore filtered sources
  444. if sources is not None:
  445. if not any(
  446. os.path.abspath(file)
  447. == os.path.abspath(s)
  448. for s in sources):
  449. at_cache[addr] = None
  450. continue
  451. else:
  452. # default to only cwd
  453. if not everything and not os.path.commonpath([
  454. os.getcwd(),
  455. os.path.abspath(file)]) == os.getcwd():
  456. at_cache[addr] = None
  457. continue
  458. # simplify path
  459. if os.path.commonpath([
  460. os.getcwd(),
  461. os.path.abspath(file)]) == os.getcwd():
  462. file = os.path.relpath(file)
  463. else:
  464. file = os.path.abspath(file)
  465. at_cache[addr] = file, sym, line
  466. last_stack.append((file, sym, line))
  467. # stop propagating?
  468. if propagate and len(last_stack) >= propagate:
  469. commit()
  470. last_filtered = False
  471. if last_filtered:
  472. commit()
  473. # rearrange results into result type
  474. def to_results(results):
  475. results_ = []
  476. for name, (r, p, e, children) in results.items():
  477. results_.append(PerfBdResult(*name,
  478. r, p, e,
  479. children=to_results(children)))
  480. return results_
  481. return to_results(results)
  482. def starapply(args):
  483. f, args, kwargs = args
  484. return f(*args, **kwargs)
  485. def collect(obj_path, trace_paths, *,
  486. jobs=None,
  487. **args):
  488. # automatic job detection?
  489. if jobs == 0:
  490. jobs = len(os.sched_getaffinity(0))
  491. # find sym/line info to reverse ASLR
  492. syms, sym_at, lines, line_at = collect_syms_and_lines(obj_path, **args)
  493. if jobs is not None:
  494. # try to split up files so that even single files can be processed
  495. # in parallel
  496. #
  497. # this looks naive, since we're splitting up text files by bytes, but
  498. # we do proper backtrace delimination in collect_job
  499. trace_ranges = []
  500. for path in trace_paths:
  501. if path == '-':
  502. trace_ranges.append([(None, None)])
  503. continue
  504. size = os.path.getsize(path)
  505. if size == 0:
  506. trace_ranges.append([(None, None)])
  507. continue
  508. perjob = m.ceil(size // jobs)
  509. trace_ranges.append([(i, i+perjob) for i in range(0, size, perjob)])
  510. results = []
  511. with mp.Pool(jobs) as p:
  512. for results_ in p.imap_unordered(
  513. starapply,
  514. ((collect_job, (path, start, stop,
  515. syms, sym_at, lines, line_at),
  516. args)
  517. for path, ranges in zip(trace_paths, trace_ranges)
  518. for start, stop in ranges)):
  519. results.extend(results_)
  520. else:
  521. results = []
  522. for path in trace_paths:
  523. results.extend(collect_job(path, None, None,
  524. syms, sym_at, lines, line_at,
  525. **args))
  526. return results
  527. def fold(Result, results, *,
  528. by=None,
  529. defines=None,
  530. **_):
  531. if by is None:
  532. by = Result._by
  533. for k in it.chain(by or [], (k for k, _ in defines or [])):
  534. if k not in Result._by and k not in Result._fields:
  535. print("error: could not find field %r?" % k)
  536. sys.exit(-1)
  537. # filter by matching defines
  538. if defines is not None:
  539. results_ = []
  540. for r in results:
  541. if all(getattr(r, k) in vs for k, vs in defines):
  542. results_.append(r)
  543. results = results_
  544. # organize results into conflicts
  545. folding = co.OrderedDict()
  546. for r in results:
  547. name = tuple(getattr(r, k) for k in by)
  548. if name not in folding:
  549. folding[name] = []
  550. folding[name].append(r)
  551. # merge conflicts
  552. folded = []
  553. for name, rs in folding.items():
  554. folded.append(sum(rs[1:], start=rs[0]))
  555. # fold recursively
  556. folded_ = []
  557. for r in folded:
  558. folded_.append(r._replace(children=fold(
  559. Result, r.children,
  560. by=by,
  561. defines=defines)))
  562. folded = folded_
  563. return folded
  564. def table(Result, results, diff_results=None, *,
  565. by=None,
  566. fields=None,
  567. sort=None,
  568. summary=False,
  569. all=False,
  570. percent=False,
  571. depth=1,
  572. **_):
  573. all_, all = all, __builtins__.all
  574. if by is None:
  575. by = Result._by
  576. if fields is None:
  577. fields = Result._fields
  578. types = Result._types
  579. # fold again
  580. results = fold(Result, results, by=by)
  581. if diff_results is not None:
  582. diff_results = fold(Result, diff_results, by=by)
  583. # organize by name
  584. table = {
  585. ','.join(str(getattr(r, k) or '') for k in by): r
  586. for r in results}
  587. diff_table = {
  588. ','.join(str(getattr(r, k) or '') for k in by): r
  589. for r in diff_results or []}
  590. names = list(table.keys() | diff_table.keys())
  591. # sort again, now with diff info, note that python's sort is stable
  592. names.sort()
  593. if diff_results is not None:
  594. names.sort(key=lambda n: tuple(
  595. types[k].ratio(
  596. getattr(table.get(n), k, None),
  597. getattr(diff_table.get(n), k, None))
  598. for k in fields),
  599. reverse=True)
  600. if sort:
  601. for k, reverse in reversed(sort):
  602. names.sort(key=lambda n: (getattr(table[n], k),)
  603. if getattr(table.get(n), k, None) is not None else (),
  604. reverse=reverse ^ (not k or k in Result._fields))
  605. # build up our lines
  606. lines = []
  607. # header
  608. header = []
  609. header.append('%s%s' % (
  610. ','.join(by),
  611. ' (%d added, %d removed)' % (
  612. sum(1 for n in table if n not in diff_table),
  613. sum(1 for n in diff_table if n not in table))
  614. if diff_results is not None and not percent else '')
  615. if not summary else '')
  616. if diff_results is None:
  617. for k in fields:
  618. header.append(k)
  619. elif percent:
  620. for k in fields:
  621. header.append(k)
  622. else:
  623. for k in fields:
  624. header.append('o'+k)
  625. for k in fields:
  626. header.append('n'+k)
  627. for k in fields:
  628. header.append('d'+k)
  629. header.append('')
  630. lines.append(header)
  631. def table_entry(name, r, diff_r=None, ratios=[]):
  632. entry = []
  633. entry.append(name)
  634. if diff_results is None:
  635. for k in fields:
  636. entry.append(getattr(r, k).table()
  637. if getattr(r, k, None) is not None
  638. else types[k].none)
  639. elif percent:
  640. for k in fields:
  641. entry.append(getattr(r, k).diff_table()
  642. if getattr(r, k, None) is not None
  643. else types[k].diff_none)
  644. else:
  645. for k in fields:
  646. entry.append(getattr(diff_r, k).diff_table()
  647. if getattr(diff_r, k, None) is not None
  648. else types[k].diff_none)
  649. for k in fields:
  650. entry.append(getattr(r, k).diff_table()
  651. if getattr(r, k, None) is not None
  652. else types[k].diff_none)
  653. for k in fields:
  654. entry.append(types[k].diff_diff(
  655. getattr(r, k, None),
  656. getattr(diff_r, k, None)))
  657. if diff_results is None:
  658. entry.append('')
  659. elif percent:
  660. entry.append(' (%s)' % ', '.join(
  661. '+∞%' if t == +m.inf
  662. else '-∞%' if t == -m.inf
  663. else '%+.1f%%' % (100*t)
  664. for t in ratios))
  665. else:
  666. entry.append(' (%s)' % ', '.join(
  667. '+∞%' if t == +m.inf
  668. else '-∞%' if t == -m.inf
  669. else '%+.1f%%' % (100*t)
  670. for t in ratios
  671. if t)
  672. if any(ratios) else '')
  673. return entry
  674. # entries
  675. if not summary:
  676. for name in names:
  677. r = table.get(name)
  678. if diff_results is None:
  679. diff_r = None
  680. ratios = None
  681. else:
  682. diff_r = diff_table.get(name)
  683. ratios = [
  684. types[k].ratio(
  685. getattr(r, k, None),
  686. getattr(diff_r, k, None))
  687. for k in fields]
  688. if not all_ and not any(ratios):
  689. continue
  690. lines.append(table_entry(name, r, diff_r, ratios))
  691. # total
  692. r = next(iter(fold(Result, results, by=[])), None)
  693. if diff_results is None:
  694. diff_r = None
  695. ratios = None
  696. else:
  697. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  698. ratios = [
  699. types[k].ratio(
  700. getattr(r, k, None),
  701. getattr(diff_r, k, None))
  702. for k in fields]
  703. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  704. # find the best widths, note that column 0 contains the names and column -1
  705. # the ratios, so those are handled a bit differently
  706. widths = [
  707. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  708. for w, i in zip(
  709. it.chain([23], it.repeat(7)),
  710. range(len(lines[0])-1))]
  711. # adjust the name width based on the expected call depth, though
  712. # note this doesn't really work with unbounded recursion
  713. if not summary and not m.isinf(depth):
  714. widths[0] += 4*(depth-1)
  715. # print the tree recursively
  716. print('%-*s %s%s' % (
  717. widths[0], lines[0][0],
  718. ' '.join('%*s' % (w, x)
  719. for w, x in zip(widths[1:], lines[0][1:-1])),
  720. lines[0][-1]))
  721. if not summary:
  722. def recurse(results_, depth_, prefixes=('', '', '', '')):
  723. # rebuild our tables at each layer
  724. table_ = {
  725. ','.join(str(getattr(r, k) or '') for k in by): r
  726. for r in results_}
  727. names_ = list(table_.keys())
  728. # sort again at each layer, keep in mind the numbers are
  729. # changing as we descend
  730. names_.sort()
  731. if sort:
  732. for k, reverse in reversed(sort):
  733. names_.sort(key=lambda n: (getattr(table_[n], k),)
  734. if getattr(table_.get(n), k, None) is not None else (),
  735. reverse=reverse ^ (not k or k in Result._fields))
  736. for i, name in enumerate(names_):
  737. r = table_[name]
  738. is_last = (i == len(names_)-1)
  739. print('%s%-*s %s' % (
  740. prefixes[0+is_last],
  741. widths[0] - (
  742. len(prefixes[0+is_last])
  743. if not m.isinf(depth) else 0),
  744. name,
  745. ' '.join('%*s' % (w, x)
  746. for w, x in zip(
  747. widths[1:],
  748. table_entry(name, r)[1:]))))
  749. # recurse?
  750. if depth_ > 1:
  751. recurse(
  752. r.children,
  753. depth_-1,
  754. (prefixes[2+is_last] + "|-> ",
  755. prefixes[2+is_last] + "'-> ",
  756. prefixes[2+is_last] + "| ",
  757. prefixes[2+is_last] + " "))
  758. # we have enough going on with diffing to make the top layer
  759. # a special case
  760. for name, line in zip(names, lines[1:-1]):
  761. print('%-*s %s%s' % (
  762. widths[0], line[0],
  763. ' '.join('%*s' % (w, x)
  764. for w, x in zip(widths[1:], line[1:-1])),
  765. line[-1]))
  766. if name in table and depth > 1:
  767. recurse(
  768. table[name].children,
  769. depth-1,
  770. ("|-> ",
  771. "'-> ",
  772. "| ",
  773. " "))
  774. print('%-*s %s%s' % (
  775. widths[0], lines[-1][0],
  776. ' '.join('%*s' % (w, x)
  777. for w, x in zip(widths[1:], lines[-1][1:-1])),
  778. lines[-1][-1]))
  779. def annotate(Result, results, *,
  780. annotate=None,
  781. threshold=None,
  782. read_threshold=None,
  783. prog_threshold=None,
  784. erase_threshold=None,
  785. **args):
  786. # figure out the thresholds
  787. if threshold is None:
  788. threshold = THRESHOLD
  789. elif len(threshold) == 1:
  790. threshold = threshold[0], threshold[0]
  791. if read_threshold is None:
  792. read_t0, read_t1 = threshold
  793. elif len(read_threshold) == 1:
  794. read_t0, read_t1 = read_threshold[0], read_threshold[0]
  795. else:
  796. read_t0, read_t1 = read_threshold
  797. read_t0, read_t1 = min(read_t0, read_t1), max(read_t0, read_t1)
  798. if prog_threshold is None:
  799. prog_t0, prog_t1 = threshold
  800. elif len(prog_threshold) == 1:
  801. prog_t0, prog_t1 = prog_threshold[0], prog_threshold[0]
  802. else:
  803. prog_t0, prog_t1 = prog_threshold
  804. prog_t0, prog_t1 = min(prog_t0, prog_t1), max(prog_t0, prog_t1)
  805. if erase_threshold is None:
  806. erase_t0, erase_t1 = threshold
  807. elif len(erase_threshold) == 1:
  808. erase_t0, erase_t1 = erase_threshold[0], erase_threshold[0]
  809. else:
  810. erase_t0, erase_t1 = erase_threshold
  811. erase_t0, erase_t1 = min(erase_t0, erase_t1), max(erase_t0, erase_t1)
  812. # find maxs
  813. max_readed = max(it.chain((float(r.readed) for r in results), [1]))
  814. max_proged = max(it.chain((float(r.proged) for r in results), [1]))
  815. max_erased = max(it.chain((float(r.erased) for r in results), [1]))
  816. for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
  817. # flatten to line info
  818. results = fold(Result, results, by=['file', 'line'])
  819. table = {r.line: r for r in results if r.file == path}
  820. # calculate spans to show
  821. if not annotate:
  822. spans = []
  823. last = None
  824. func = None
  825. for line, r in sorted(table.items()):
  826. if (float(r.readed) / max_readed >= read_t0
  827. or float(r.proged) / max_proged >= prog_t0
  828. or float(r.erased) / max_erased >= erase_t0):
  829. if last is not None and line - last.stop <= args['context']:
  830. last = range(
  831. last.start,
  832. line+1+args['context'])
  833. else:
  834. if last is not None:
  835. spans.append((last, func))
  836. last = range(
  837. line-args['context'],
  838. line+1+args['context'])
  839. func = r.function
  840. if last is not None:
  841. spans.append((last, func))
  842. with open(path) as f:
  843. skipped = False
  844. for i, line in enumerate(f):
  845. # skip lines not in spans?
  846. if not annotate and not any(i+1 in s for s, _ in spans):
  847. skipped = True
  848. continue
  849. if skipped:
  850. skipped = False
  851. print('%s@@ %s:%d: %s @@%s' % (
  852. '\x1b[36m' if args['color'] else '',
  853. path,
  854. i+1,
  855. next(iter(f for _, f in spans)),
  856. '\x1b[m' if args['color'] else ''))
  857. # build line
  858. if line.endswith('\n'):
  859. line = line[:-1]
  860. if i+1 in table:
  861. r = table[i+1]
  862. line = '%-*s // %s readed, %s proged, %s erased' % (
  863. args['width'],
  864. line,
  865. r.readed,
  866. r.proged,
  867. r.erased)
  868. if args['color']:
  869. if (float(r.readed) / max_readed >= read_t1
  870. or float(r.proged) / max_proged >= prog_t1
  871. or float(r.erased) / max_erased >= erase_t1):
  872. line = '\x1b[1;31m%s\x1b[m' % line
  873. elif (float(r.readed) / max_readed >= read_t0
  874. or float(r.proged) / max_proged >= prog_t0
  875. or float(r.erased) / max_erased >= erase_t0):
  876. line = '\x1b[35m%s\x1b[m' % line
  877. print(line)
  878. def report(obj_path='', trace_paths=[], *,
  879. by=None,
  880. fields=None,
  881. defines=None,
  882. sort=None,
  883. **args):
  884. # figure out what color should be
  885. if args.get('color') == 'auto':
  886. args['color'] = sys.stdout.isatty()
  887. elif args.get('color') == 'always':
  888. args['color'] = True
  889. else:
  890. args['color'] = False
  891. # depth of 0 == m.inf
  892. if args.get('depth') == 0:
  893. args['depth'] = m.inf
  894. # find sizes
  895. if not args.get('use', None):
  896. results = collect(obj_path, trace_paths, **args)
  897. else:
  898. results = []
  899. with openio(args['use']) as f:
  900. reader = csv.DictReader(f, restval='')
  901. for r in reader:
  902. try:
  903. results.append(PerfBdResult(
  904. **{k: r[k] for k in PerfBdResult._by
  905. if k in r and r[k].strip()},
  906. **{k: r['perfbd_'+k] for k in PerfBdResult._fields
  907. if 'perfbd_'+k in r and r['perfbd_'+k].strip()}))
  908. except TypeError:
  909. pass
  910. # fold
  911. results = fold(PerfBdResult, results, by=by, defines=defines)
  912. # sort, note that python's sort is stable
  913. results.sort()
  914. if sort:
  915. for k, reverse in reversed(sort):
  916. results.sort(key=lambda r: (getattr(r, k),)
  917. if getattr(r, k) is not None else (),
  918. reverse=reverse ^ (not k or k in PerfBdResult._fields))
  919. # write results to CSV
  920. if args.get('output'):
  921. with openio(args['output'], 'w') as f:
  922. writer = csv.DictWriter(f,
  923. (by if by is not None else PerfBdResult._by)
  924. + ['perfbd_'+k for k in PerfBdResult._fields])
  925. writer.writeheader()
  926. for r in results:
  927. writer.writerow(
  928. {k: getattr(r, k)
  929. for k in (by if by is not None else PerfBdResult._by)}
  930. | {'perfbd_'+k: getattr(r, k)
  931. for k in PerfBdResult._fields})
  932. # find previous results?
  933. if args.get('diff'):
  934. diff_results = []
  935. try:
  936. with openio(args['diff']) as f:
  937. reader = csv.DictReader(f, restval='')
  938. for r in reader:
  939. try:
  940. diff_results.append(PerfBdResult(
  941. **{k: r[k] for k in PerfBdResult._by
  942. if k in r and r[k].strip()},
  943. **{k: r['perfbd_'+k] for k in PerfBdResult._fields
  944. if 'perfbd_'+k in r
  945. and r['perfbd_'+k].strip()}))
  946. except TypeError:
  947. pass
  948. except FileNotFoundError:
  949. pass
  950. # fold
  951. diff_results = fold(PerfBdResult, diff_results, by=by, defines=defines)
  952. # print table
  953. if not args.get('quiet'):
  954. if (args.get('annotate')
  955. or args.get('threshold')
  956. or args.get('read_threshold')
  957. or args.get('prog_threshold')
  958. or args.get('erase_threshold')):
  959. # annotate sources
  960. annotate(PerfBdResult, results, **args)
  961. else:
  962. # print table
  963. table(PerfBdResult, results,
  964. diff_results if args.get('diff') else None,
  965. by=by if by is not None else ['function'],
  966. fields=fields,
  967. sort=sort,
  968. **args)
  969. def main(**args):
  970. if args.get('record'):
  971. return record(**args)
  972. else:
  973. return report(**args)
  974. if __name__ == "__main__":
  975. import argparse
  976. import sys
  977. parser = argparse.ArgumentParser(
  978. description="Aggregate and report call-stack propagated "
  979. "block-device operations from trace output.",
  980. allow_abbrev=False)
  981. parser.add_argument(
  982. 'obj_path',
  983. nargs='?',
  984. help="Input executable for mapping addresses to symbols.")
  985. parser.add_argument(
  986. 'trace_paths',
  987. nargs='*',
  988. help="Input *.trace files.")
  989. parser.add_argument(
  990. '-v', '--verbose',
  991. action='store_true',
  992. help="Output commands that run behind the scenes.")
  993. parser.add_argument(
  994. '-q', '--quiet',
  995. action='store_true',
  996. help="Don't show anything, useful with -o.")
  997. parser.add_argument(
  998. '-o', '--output',
  999. help="Specify CSV file to store results.")
  1000. parser.add_argument(
  1001. '-u', '--use',
  1002. help="Don't parse anything, use this CSV file.")
  1003. parser.add_argument(
  1004. '-d', '--diff',
  1005. help="Specify CSV file to diff against.")
  1006. parser.add_argument(
  1007. '-a', '--all',
  1008. action='store_true',
  1009. help="Show all, not just the ones that changed.")
  1010. parser.add_argument(
  1011. '-p', '--percent',
  1012. action='store_true',
  1013. help="Only show percentage change, not a full diff.")
  1014. parser.add_argument(
  1015. '-b', '--by',
  1016. action='append',
  1017. choices=PerfBdResult._by,
  1018. help="Group by this field.")
  1019. parser.add_argument(
  1020. '-f', '--field',
  1021. dest='fields',
  1022. action='append',
  1023. choices=PerfBdResult._fields,
  1024. help="Show this field.")
  1025. parser.add_argument(
  1026. '-D', '--define',
  1027. dest='defines',
  1028. action='append',
  1029. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  1030. help="Only include results where this field is this value.")
  1031. class AppendSort(argparse.Action):
  1032. def __call__(self, parser, namespace, value, option):
  1033. if namespace.sort is None:
  1034. namespace.sort = []
  1035. namespace.sort.append((value, True if option == '-S' else False))
  1036. parser.add_argument(
  1037. '-s', '--sort',
  1038. action=AppendSort,
  1039. help="Sort by this fields.")
  1040. parser.add_argument(
  1041. '-S', '--reverse-sort',
  1042. action=AppendSort,
  1043. help="Sort by this fields, but backwards.")
  1044. parser.add_argument(
  1045. '-Y', '--summary',
  1046. action='store_true',
  1047. help="Only show the total.")
  1048. parser.add_argument(
  1049. '-F', '--source',
  1050. dest='sources',
  1051. action='append',
  1052. help="Only consider definitions in this file. Defaults to anything "
  1053. "in the current directory.")
  1054. parser.add_argument(
  1055. '--everything',
  1056. action='store_true',
  1057. help="Include builtin and libc specific symbols.")
  1058. parser.add_argument(
  1059. '-P', '--propagate',
  1060. type=lambda x: int(x, 0),
  1061. help="Depth to propagate samples up the call-stack. 0 propagates up "
  1062. "to the entry point, 1 does no propagation. Defaults to 0.")
  1063. parser.add_argument(
  1064. '-Z', '--depth',
  1065. nargs='?',
  1066. type=lambda x: int(x, 0),
  1067. const=0,
  1068. help="Depth of function calls to show. 0 shows all calls but may not "
  1069. "terminate!")
  1070. parser.add_argument(
  1071. '-A', '--annotate',
  1072. action='store_true',
  1073. help="Show source files annotated with coverage info.")
  1074. parser.add_argument(
  1075. '-T', '--threshold',
  1076. nargs='?',
  1077. type=lambda x: tuple(float(x) for x in x.split(',')),
  1078. const=THRESHOLD,
  1079. help="Show lines with any ops above this threshold as a percent of "
  1080. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1081. parser.add_argument(
  1082. '--read-threshold',
  1083. nargs='?',
  1084. type=lambda x: tuple(float(x) for x in x.split(',')),
  1085. const=THRESHOLD,
  1086. help="Show lines with reads above this threshold as a percent of "
  1087. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1088. parser.add_argument(
  1089. '--prog-threshold',
  1090. nargs='?',
  1091. type=lambda x: tuple(float(x) for x in x.split(',')),
  1092. const=THRESHOLD,
  1093. help="Show lines with progs above this threshold as a percent of "
  1094. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1095. parser.add_argument(
  1096. '--erase-threshold',
  1097. nargs='?',
  1098. type=lambda x: tuple(float(x) for x in x.split(',')),
  1099. const=THRESHOLD,
  1100. help="Show lines with erases above this threshold as a percent of "
  1101. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1102. parser.add_argument(
  1103. '-c', '--context',
  1104. type=lambda x: int(x, 0),
  1105. default=3,
  1106. help="Show n additional lines of context. Defaults to 3.")
  1107. parser.add_argument(
  1108. '-W', '--width',
  1109. type=lambda x: int(x, 0),
  1110. default=80,
  1111. help="Assume source is styled with this many columns. Defaults to 80.")
  1112. parser.add_argument(
  1113. '--color',
  1114. choices=['never', 'always', 'auto'],
  1115. default='auto',
  1116. help="When to use terminal colors. Defaults to 'auto'.")
  1117. parser.add_argument(
  1118. '-j', '--jobs',
  1119. nargs='?',
  1120. type=lambda x: int(x, 0),
  1121. const=0,
  1122. help="Number of processes to use. 0 spawns one process per core.")
  1123. parser.add_argument(
  1124. '--objdump-tool',
  1125. type=lambda x: x.split(),
  1126. default=OBJDUMP_TOOL,
  1127. help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
  1128. sys.exit(main(**{k: v
  1129. for k, v in vars(parser.parse_intermixed_args()).items()
  1130. if v is not None}))