perfbd.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254
  1. #!/usr/bin/env python3
  2. #
  3. # Aggregate and report call-stack propagated block-device operations
  4. # from trace output.
  5. #
  6. # Example:
  7. # ./scripts/bench.py -ttrace
  8. # ./scripts/perfbd.py trace -j -Flfs.c -Flfs_util.c -Serased -Sproged -Sreaded
  9. #
  10. # Copyright (c) 2022, The littlefs authors.
  11. # SPDX-License-Identifier: BSD-3-Clause
  12. #
  13. import bisect
  14. import collections as co
  15. import csv
  16. import functools as ft
  17. import itertools as it
  18. import math as m
  19. import multiprocessing as mp
  20. import os
  21. import re
  22. import shlex
  23. import subprocess as sp
  24. OBJDUMP_PATH = ['objdump']
  25. THRESHOLD = (0.5, 0.85)
  26. # integer fields
  27. class Int(co.namedtuple('Int', 'x')):
  28. __slots__ = ()
  29. def __new__(cls, x=0):
  30. if isinstance(x, Int):
  31. return x
  32. if isinstance(x, str):
  33. try:
  34. x = int(x, 0)
  35. except ValueError:
  36. # also accept +-∞ and +-inf
  37. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  38. x = m.inf
  39. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  40. x = -m.inf
  41. else:
  42. raise
  43. assert isinstance(x, int) or m.isinf(x), x
  44. return super().__new__(cls, x)
  45. def __str__(self):
  46. if self.x == m.inf:
  47. return '∞'
  48. elif self.x == -m.inf:
  49. return '-∞'
  50. else:
  51. return str(self.x)
  52. def __int__(self):
  53. assert not m.isinf(self.x)
  54. return self.x
  55. def __float__(self):
  56. return float(self.x)
  57. none = '%7s' % '-'
  58. def table(self):
  59. return '%7s' % (self,)
  60. diff_none = '%7s' % '-'
  61. diff_table = table
  62. def diff_diff(self, other):
  63. new = self.x if self else 0
  64. old = other.x if other else 0
  65. diff = new - old
  66. if diff == +m.inf:
  67. return '%7s' % '+∞'
  68. elif diff == -m.inf:
  69. return '%7s' % '-∞'
  70. else:
  71. return '%+7d' % diff
  72. def ratio(self, other):
  73. new = self.x if self else 0
  74. old = other.x if other else 0
  75. if m.isinf(new) and m.isinf(old):
  76. return 0.0
  77. elif m.isinf(new):
  78. return +m.inf
  79. elif m.isinf(old):
  80. return -m.inf
  81. elif not old and not new:
  82. return 0.0
  83. elif not old:
  84. return 1.0
  85. else:
  86. return (new-old) / old
  87. def __add__(self, other):
  88. return self.__class__(self.x + other.x)
  89. def __sub__(self, other):
  90. return self.__class__(self.x - other.x)
  91. def __mul__(self, other):
  92. return self.__class__(self.x * other.x)
  93. # perf results
  94. class PerfBdResult(co.namedtuple('PerfBdResult', [
  95. 'file', 'function', 'line',
  96. 'readed', 'proged', 'erased',
  97. 'children'])):
  98. _by = ['file', 'function', 'line']
  99. _fields = ['readed', 'proged', 'erased']
  100. _types = {'readed': Int, 'proged': Int, 'erased': Int}
  101. __slots__ = ()
  102. def __new__(cls, file='', function='', line=0,
  103. readed=0, proged=0, erased=0,
  104. children=[]):
  105. return super().__new__(cls, file, function, int(Int(line)),
  106. Int(readed), Int(proged), Int(erased),
  107. children)
  108. def __add__(self, other):
  109. return PerfBdResult(self.file, self.function, self.line,
  110. self.readed + other.readed,
  111. self.proged + other.proged,
  112. self.erased + other.erased,
  113. self.children + other.children)
  114. def openio(path, mode='r', buffering=-1):
  115. # allow '-' for stdin/stdout
  116. if path == '-':
  117. if mode == 'r':
  118. return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
  119. else:
  120. return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
  121. else:
  122. return open(path, mode, buffering)
  123. def collect_syms_and_lines(obj_path, *,
  124. objdump_path=None,
  125. **args):
  126. symbol_pattern = re.compile(
  127. '^(?P<addr>[0-9a-fA-F]+)'
  128. '\s+.*'
  129. '\s+(?P<size>[0-9a-fA-F]+)'
  130. '\s+(?P<name>[^\s]+)\s*$')
  131. line_pattern = re.compile(
  132. '^\s+(?:'
  133. # matches dir/file table
  134. '(?P<no>[0-9]+)'
  135. '(?:\s+(?P<dir>[0-9]+))?'
  136. '\s+.*'
  137. '\s+(?P<path>[^\s]+)'
  138. # matches line opcodes
  139. '|' '\[[^\]]*\]\s+'
  140. '(?:'
  141. '(?P<op_special>Special)'
  142. '|' '(?P<op_copy>Copy)'
  143. '|' '(?P<op_end>End of Sequence)'
  144. '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
  145. '|' 'Line .*?to (?P<op_line>[0-9]+)'
  146. '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
  147. '|' '.' ')*'
  148. ')$', re.IGNORECASE)
  149. # figure out symbol addresses
  150. syms = {}
  151. sym_at = []
  152. cmd = objdump_path + ['-t', obj_path]
  153. if args.get('verbose'):
  154. print(' '.join(shlex.quote(c) for c in cmd))
  155. proc = sp.Popen(cmd,
  156. stdout=sp.PIPE,
  157. stderr=sp.PIPE if not args.get('verbose') else None,
  158. universal_newlines=True,
  159. errors='replace',
  160. close_fds=False)
  161. for line in proc.stdout:
  162. m = symbol_pattern.match(line)
  163. if m:
  164. name = m.group('name')
  165. addr = int(m.group('addr'), 16)
  166. size = int(m.group('size'), 16)
  167. # ignore zero-sized symbols
  168. if not size:
  169. continue
  170. # note multiple symbols can share a name
  171. if name not in syms:
  172. syms[name] = set()
  173. syms[name].add((addr, size))
  174. sym_at.append((addr, name, size))
  175. proc.wait()
  176. if proc.returncode != 0:
  177. if not args.get('verbose'):
  178. for line in proc.stderr:
  179. sys.stdout.write(line)
  180. # assume no debug-info on failure
  181. pass
  182. # sort and keep largest/first when duplicates
  183. sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
  184. sym_at_ = []
  185. for addr, name, size in sym_at:
  186. if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
  187. sym_at_.append((addr, name, size))
  188. sym_at = sym_at_
  189. # state machine for dwarf line numbers, note that objdump's
  190. # decodedline seems to have issues with multiple dir/file
  191. # tables, which is why we need this
  192. lines = []
  193. line_at = []
  194. dirs = {}
  195. files = {}
  196. op_file = 1
  197. op_line = 1
  198. op_addr = 0
  199. cmd = objdump_path + ['--dwarf=rawline', obj_path]
  200. if args.get('verbose'):
  201. print(' '.join(shlex.quote(c) for c in cmd))
  202. proc = sp.Popen(cmd,
  203. stdout=sp.PIPE,
  204. stderr=sp.PIPE if not args.get('verbose') else None,
  205. universal_newlines=True,
  206. errors='replace',
  207. close_fds=False)
  208. for line in proc.stdout:
  209. m = line_pattern.match(line)
  210. if m:
  211. if m.group('no') and not m.group('dir'):
  212. # found a directory entry
  213. dirs[int(m.group('no'))] = m.group('path')
  214. elif m.group('no'):
  215. # found a file entry
  216. dir = int(m.group('dir'))
  217. if dir in dirs:
  218. files[int(m.group('no'))] = os.path.join(
  219. dirs[dir],
  220. m.group('path'))
  221. else:
  222. files[int(m.group('no'))] = m.group('path')
  223. else:
  224. # found a state machine update
  225. if m.group('op_file'):
  226. op_file = int(m.group('op_file'), 0)
  227. if m.group('op_line'):
  228. op_line = int(m.group('op_line'), 0)
  229. if m.group('op_addr'):
  230. op_addr = int(m.group('op_addr'), 0)
  231. if (m.group('op_special')
  232. or m.group('op_copy')
  233. or m.group('op_end')):
  234. file = os.path.abspath(files.get(op_file, '?'))
  235. lines.append((file, op_line, op_addr))
  236. line_at.append((op_addr, file, op_line))
  237. if m.group('op_end'):
  238. op_file = 1
  239. op_line = 1
  240. op_addr = 0
  241. proc.wait()
  242. if proc.returncode != 0:
  243. if not args.get('verbose'):
  244. for line in proc.stderr:
  245. sys.stdout.write(line)
  246. # assume no debug-info on failure
  247. pass
  248. # sort and keep first when duplicates
  249. lines.sort()
  250. lines_ = []
  251. for file, line, addr in lines:
  252. if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
  253. lines_.append((file, line, addr))
  254. lines = lines_
  255. # sort and keep first when duplicates
  256. line_at.sort()
  257. line_at_ = []
  258. for addr, file, line in line_at:
  259. if len(line_at_) == 0 or line_at_[-1][0] != addr:
  260. line_at_.append((addr, file, line))
  261. line_at = line_at_
  262. return syms, sym_at, lines, line_at
  263. def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
  264. sources=None,
  265. everything=False,
  266. propagate=0,
  267. depth=1,
  268. **args):
  269. trace_pattern = re.compile(
  270. '^(?P<file>[^:]*):(?P<line>[0-9]+):trace:\s*(?P<prefix>[^\s]*?bd_)(?:'
  271. '(?P<read>read)\('
  272. '\s*(?P<read_ctx>\w+)' '\s*,'
  273. '\s*(?P<read_block>\w+)' '\s*,'
  274. '\s*(?P<read_off>\w+)' '\s*,'
  275. '\s*(?P<read_buffer>\w+)' '\s*,'
  276. '\s*(?P<read_size>\w+)' '\s*\)'
  277. '|' '(?P<prog>prog)\('
  278. '\s*(?P<prog_ctx>\w+)' '\s*,'
  279. '\s*(?P<prog_block>\w+)' '\s*,'
  280. '\s*(?P<prog_off>\w+)' '\s*,'
  281. '\s*(?P<prog_buffer>\w+)' '\s*,'
  282. '\s*(?P<prog_size>\w+)' '\s*\)'
  283. '|' '(?P<erase>erase)\('
  284. '\s*(?P<erase_ctx>\w+)' '\s*,'
  285. '\s*(?P<erase_block>\w+)'
  286. '\s*\(\s*(?P<erase_size>\w+)\s*\)' '\s*\)' ')\s*$')
  287. frame_pattern = re.compile(
  288. '^\s+at (?P<addr>\w+)\s*$')
  289. # parse all of the trace files for read/prog/erase operations
  290. last_filtered = False
  291. last_file = None
  292. last_line = None
  293. last_sym = None
  294. last_readed = 0
  295. last_proged = 0
  296. last_erased = 0
  297. last_stack = []
  298. last_delta = None
  299. at_cache = {}
  300. results = {}
  301. def commit():
  302. # fallback to just capturing top-level measurements
  303. if not last_stack:
  304. file = last_file
  305. sym = last_sym
  306. line = last_line
  307. # ignore filtered sources
  308. if sources is not None:
  309. if not any(
  310. os.path.abspath(file)
  311. == os.path.abspath(s)
  312. for s in sources):
  313. return
  314. else:
  315. # default to only cwd
  316. if not everything and not os.path.commonpath([
  317. os.getcwd(),
  318. os.path.abspath(file)]) == os.getcwd():
  319. return
  320. # simplify path
  321. if os.path.commonpath([
  322. os.getcwd(),
  323. os.path.abspath(file)]) == os.getcwd():
  324. file = os.path.relpath(file)
  325. else:
  326. file = os.path.abspath(file)
  327. results[(file, sym, line)] = (
  328. last_readed,
  329. last_proged,
  330. last_erased,
  331. {})
  332. else:
  333. # tail-recursively propagate measurements
  334. for i in range(len(last_stack)):
  335. results_ = results
  336. for j in reversed(range(i+1)):
  337. if i+1-j > depth:
  338. break
  339. # propagate
  340. name = last_stack[j]
  341. if name in results_:
  342. r, p, e, children = results_[name]
  343. else:
  344. r, p, e, children = 0, 0, 0, {}
  345. results_[name] = (
  346. r+last_readed,
  347. p+last_proged,
  348. e+last_erased,
  349. children)
  350. # recurse
  351. results_ = results_[name][-1]
  352. with openio(path) as f:
  353. # try to jump to middle of file? need step out of utf8-safe mode and
  354. # then resync up with the next newline to avoid parsing half a line
  355. if start is not None and start > 0:
  356. fd = f.fileno()
  357. os.lseek(fd, start, os.SEEK_SET)
  358. while os.read(fd, 1) not in {b'\n', b'\r', b''}:
  359. pass
  360. f = os.fdopen(fd)
  361. for line in f:
  362. # we have a lot of data, try to take a few shortcuts,
  363. # string search is much faster than regex so try to use
  364. # regex as late as possible.
  365. if not line.startswith('\t'):
  366. if last_filtered:
  367. commit()
  368. last_filtered = False
  369. # done processing our slice?
  370. if stop is not None:
  371. if os.lseek(f.fileno(), 0, os.SEEK_CUR) > stop:
  372. break
  373. if 'trace' in line and 'bd' in line:
  374. m = trace_pattern.match(line)
  375. if m:
  376. last_filtered = True
  377. last_file = os.path.abspath(m.group('file'))
  378. last_line = int(m.group('line'), 0)
  379. last_sym = m.group('prefix')
  380. last_readed = 0
  381. last_proged = 0
  382. last_erased = 0
  383. last_stack = []
  384. last_delta = None
  385. if m.group('read'):
  386. last_sym += m.group('read')
  387. last_readed += int(m.group('read_size'))
  388. elif m.group('prog'):
  389. last_sym += m.group('prog')
  390. last_proged += int(m.group('prog_size'))
  391. elif m.group('erase'):
  392. last_sym += m.group('erase')
  393. last_erased += int(m.group('erase_size'))
  394. elif last_filtered:
  395. m = frame_pattern.match(line)
  396. if m:
  397. addr_ = int(m.group('addr'), 0)
  398. # before we can do anything with addr, we need to
  399. # reverse ASLR, fortunately we know the file+line of
  400. # the first stack frame, so we can use that as a point
  401. # of reference
  402. if last_delta is None:
  403. i = bisect.bisect(lines, (last_file, last_line),
  404. key=lambda x: (x[0], x[1]))
  405. if i > 0:
  406. last_delta = lines[i-1][2] - addr_
  407. else:
  408. # can't reverse ASLR, give up on backtrace
  409. commit()
  410. last_filtered = False
  411. continue
  412. addr = addr_ + last_delta
  413. # cached?
  414. if addr in at_cache:
  415. cached = at_cache[addr]
  416. if cached is None:
  417. # cache says to skip
  418. continue
  419. file, sym, line = cached
  420. else:
  421. # find sym
  422. i = bisect.bisect(sym_at, addr, key=lambda x: x[0])
  423. # check that we're actually in the sym's size
  424. if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
  425. _, sym, _ = sym_at[i-1]
  426. else:
  427. sym = hex(addr)
  428. # filter out internal/unknown functions
  429. if not everything and (
  430. sym.startswith('__')
  431. or sym.startswith('0')
  432. or sym.startswith('-')
  433. or sym == '_start'):
  434. at_cache[addr] = None
  435. continue
  436. # find file+line
  437. i = bisect.bisect(line_at, addr, key=lambda x: x[0])
  438. if i > 0:
  439. _, file, line = line_at[i-1]
  440. elif len(last_stack) == 0:
  441. file, line = last_file, last_line
  442. else:
  443. file, line = re.sub('(\.o)?$', '.c', obj_path, 1), 0
  444. # ignore filtered sources
  445. if sources is not None:
  446. if not any(
  447. os.path.abspath(file)
  448. == os.path.abspath(s)
  449. for s in sources):
  450. at_cache[addr] = None
  451. continue
  452. else:
  453. # default to only cwd
  454. if not everything and not os.path.commonpath([
  455. os.getcwd(),
  456. os.path.abspath(file)]) == os.getcwd():
  457. at_cache[addr] = None
  458. continue
  459. # simplify path
  460. if os.path.commonpath([
  461. os.getcwd(),
  462. os.path.abspath(file)]) == os.getcwd():
  463. file = os.path.relpath(file)
  464. else:
  465. file = os.path.abspath(file)
  466. at_cache[addr] = file, sym, line
  467. last_stack.append((file, sym, line))
  468. # stop propagating?
  469. if propagate and len(last_stack) >= propagate:
  470. commit()
  471. last_filtered = False
  472. if last_filtered:
  473. commit()
  474. # rearrange results into result type
  475. def to_results(results):
  476. results_ = []
  477. for name, (r, p, e, children) in results.items():
  478. results_.append(PerfBdResult(*name,
  479. r, p, e,
  480. children=to_results(children)))
  481. return results_
  482. return to_results(results)
  483. def starapply(args):
  484. f, args, kwargs = args
  485. return f(*args, **kwargs)
  486. def collect(obj_path, trace_paths, *,
  487. jobs=None,
  488. **args):
  489. # automatic job detection?
  490. if jobs == 0:
  491. jobs = len(os.sched_getaffinity(0))
  492. # find sym/line info to reverse ASLR
  493. syms, sym_at, lines, line_at = collect_syms_and_lines(obj_path, **args)
  494. if jobs is not None:
  495. # try to split up files so that even single files can be processed
  496. # in parallel
  497. #
  498. # this looks naive, since we're splitting up text files by bytes, but
  499. # we do proper backtrace delimination in collect_job
  500. trace_ranges = []
  501. for path in trace_paths:
  502. if path == '-':
  503. trace_ranges.append([(None, None)])
  504. continue
  505. size = os.path.getsize(path)
  506. if size == 0:
  507. trace_ranges.append([(None, None)])
  508. continue
  509. perjob = m.ceil(size // jobs)
  510. trace_ranges.append([(i, i+perjob) for i in range(0, size, perjob)])
  511. results = []
  512. with mp.Pool(jobs) as p:
  513. for results_ in p.imap_unordered(
  514. starapply,
  515. ((collect_job, (path, start, stop,
  516. syms, sym_at, lines, line_at),
  517. args)
  518. for path, ranges in zip(trace_paths, trace_ranges)
  519. for start, stop in ranges)):
  520. results.extend(results_)
  521. else:
  522. results = []
  523. for path in trace_paths:
  524. results.extend(collect_job(path, None, None,
  525. syms, sym_at, lines, line_at,
  526. **args))
  527. return results
  528. def fold(Result, results, *,
  529. by=None,
  530. defines=None,
  531. **_):
  532. if by is None:
  533. by = Result._by
  534. for k in it.chain(by or [], (k for k, _ in defines or [])):
  535. if k not in Result._by and k not in Result._fields:
  536. print("error: could not find field %r?" % k)
  537. sys.exit(-1)
  538. # filter by matching defines
  539. if defines is not None:
  540. results_ = []
  541. for r in results:
  542. if all(getattr(r, k) in vs for k, vs in defines):
  543. results_.append(r)
  544. results = results_
  545. # organize results into conflicts
  546. folding = co.OrderedDict()
  547. for r in results:
  548. name = tuple(getattr(r, k) for k in by)
  549. if name not in folding:
  550. folding[name] = []
  551. folding[name].append(r)
  552. # merge conflicts
  553. folded = []
  554. for name, rs in folding.items():
  555. folded.append(sum(rs[1:], start=rs[0]))
  556. # fold recursively
  557. folded_ = []
  558. for r in folded:
  559. folded_.append(r._replace(children=fold(
  560. Result, r.children,
  561. by=by,
  562. defines=defines)))
  563. folded = folded_
  564. return folded
  565. def table(Result, results, diff_results=None, *,
  566. by=None,
  567. fields=None,
  568. sort=None,
  569. summary=False,
  570. all=False,
  571. percent=False,
  572. depth=1,
  573. **_):
  574. all_, all = all, __builtins__.all
  575. if by is None:
  576. by = Result._by
  577. if fields is None:
  578. fields = Result._fields
  579. types = Result._types
  580. # fold again
  581. results = fold(Result, results, by=by)
  582. if diff_results is not None:
  583. diff_results = fold(Result, diff_results, by=by)
  584. # organize by name
  585. table = {
  586. ','.join(str(getattr(r, k) or '') for k in by): r
  587. for r in results}
  588. diff_table = {
  589. ','.join(str(getattr(r, k) or '') for k in by): r
  590. for r in diff_results or []}
  591. names = list(table.keys() | diff_table.keys())
  592. # sort again, now with diff info, note that python's sort is stable
  593. names.sort()
  594. if diff_results is not None:
  595. names.sort(key=lambda n: tuple(
  596. types[k].ratio(
  597. getattr(table.get(n), k, None),
  598. getattr(diff_table.get(n), k, None))
  599. for k in fields),
  600. reverse=True)
  601. if sort:
  602. for k, reverse in reversed(sort):
  603. names.sort(key=lambda n: (getattr(table[n], k),)
  604. if getattr(table.get(n), k, None) is not None else (),
  605. reverse=reverse ^ (not k or k in Result._fields))
  606. # build up our lines
  607. lines = []
  608. # header
  609. header = []
  610. header.append('%s%s' % (
  611. ','.join(by),
  612. ' (%d added, %d removed)' % (
  613. sum(1 for n in table if n not in diff_table),
  614. sum(1 for n in diff_table if n not in table))
  615. if diff_results is not None and not percent else '')
  616. if not summary else '')
  617. if diff_results is None:
  618. for k in fields:
  619. header.append(k)
  620. elif percent:
  621. for k in fields:
  622. header.append(k)
  623. else:
  624. for k in fields:
  625. header.append('o'+k)
  626. for k in fields:
  627. header.append('n'+k)
  628. for k in fields:
  629. header.append('d'+k)
  630. header.append('')
  631. lines.append(header)
  632. def table_entry(name, r, diff_r=None, ratios=[]):
  633. entry = []
  634. entry.append(name)
  635. if diff_results is None:
  636. for k in fields:
  637. entry.append(getattr(r, k).table()
  638. if getattr(r, k, None) is not None
  639. else types[k].none)
  640. elif percent:
  641. for k in fields:
  642. entry.append(getattr(r, k).diff_table()
  643. if getattr(r, k, None) is not None
  644. else types[k].diff_none)
  645. else:
  646. for k in fields:
  647. entry.append(getattr(diff_r, k).diff_table()
  648. if getattr(diff_r, k, None) is not None
  649. else types[k].diff_none)
  650. for k in fields:
  651. entry.append(getattr(r, k).diff_table()
  652. if getattr(r, k, None) is not None
  653. else types[k].diff_none)
  654. for k in fields:
  655. entry.append(types[k].diff_diff(
  656. getattr(r, k, None),
  657. getattr(diff_r, k, None)))
  658. if diff_results is None:
  659. entry.append('')
  660. elif percent:
  661. entry.append(' (%s)' % ', '.join(
  662. '+∞%' if t == +m.inf
  663. else '-∞%' if t == -m.inf
  664. else '%+.1f%%' % (100*t)
  665. for t in ratios))
  666. else:
  667. entry.append(' (%s)' % ', '.join(
  668. '+∞%' if t == +m.inf
  669. else '-∞%' if t == -m.inf
  670. else '%+.1f%%' % (100*t)
  671. for t in ratios
  672. if t)
  673. if any(ratios) else '')
  674. return entry
  675. # entries
  676. if not summary:
  677. for name in names:
  678. r = table.get(name)
  679. if diff_results is None:
  680. diff_r = None
  681. ratios = None
  682. else:
  683. diff_r = diff_table.get(name)
  684. ratios = [
  685. types[k].ratio(
  686. getattr(r, k, None),
  687. getattr(diff_r, k, None))
  688. for k in fields]
  689. if not all_ and not any(ratios):
  690. continue
  691. lines.append(table_entry(name, r, diff_r, ratios))
  692. # total
  693. r = next(iter(fold(Result, results, by=[])), None)
  694. if diff_results is None:
  695. diff_r = None
  696. ratios = None
  697. else:
  698. diff_r = next(iter(fold(Result, diff_results, by=[])), None)
  699. ratios = [
  700. types[k].ratio(
  701. getattr(r, k, None),
  702. getattr(diff_r, k, None))
  703. for k in fields]
  704. lines.append(table_entry('TOTAL', r, diff_r, ratios))
  705. # find the best widths, note that column 0 contains the names and column -1
  706. # the ratios, so those are handled a bit differently
  707. widths = [
  708. ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
  709. for w, i in zip(
  710. it.chain([23], it.repeat(7)),
  711. range(len(lines[0])-1))]
  712. # adjust the name width based on the expected call depth, though
  713. # note this doesn't really work with unbounded recursion
  714. if not summary and not m.isinf(depth):
  715. widths[0] += 4*(depth-1)
  716. # print the tree recursively
  717. print('%-*s %s%s' % (
  718. widths[0], lines[0][0],
  719. ' '.join('%*s' % (w, x)
  720. for w, x in zip(widths[1:], lines[0][1:-1])),
  721. lines[0][-1]))
  722. if not summary:
  723. def recurse(results_, depth_, prefixes=('', '', '', '')):
  724. # rebuild our tables at each layer
  725. table_ = {
  726. ','.join(str(getattr(r, k) or '') for k in by): r
  727. for r in results_}
  728. names_ = list(table_.keys())
  729. # sort again at each layer, keep in mind the numbers are
  730. # changing as we descend
  731. names_.sort()
  732. if sort:
  733. for k, reverse in reversed(sort):
  734. names_.sort(key=lambda n: (getattr(table_[n], k),)
  735. if getattr(table_.get(n), k, None) is not None else (),
  736. reverse=reverse ^ (not k or k in Result._fields))
  737. for i, name in enumerate(names_):
  738. r = table_[name]
  739. is_last = (i == len(names_)-1)
  740. print('%s%-*s %s' % (
  741. prefixes[0+is_last],
  742. widths[0] - (
  743. len(prefixes[0+is_last])
  744. if not m.isinf(depth) else 0),
  745. name,
  746. ' '.join('%*s' % (w, x)
  747. for w, x in zip(
  748. widths[1:],
  749. table_entry(name, r)[1:]))))
  750. # recurse?
  751. if depth_ > 1:
  752. recurse(
  753. r.children,
  754. depth_-1,
  755. (prefixes[2+is_last] + "|-> ",
  756. prefixes[2+is_last] + "'-> ",
  757. prefixes[2+is_last] + "| ",
  758. prefixes[2+is_last] + " "))
  759. # we have enough going on with diffing to make the top layer
  760. # a special case
  761. for name, line in zip(names, lines[1:-1]):
  762. print('%-*s %s%s' % (
  763. widths[0], line[0],
  764. ' '.join('%*s' % (w, x)
  765. for w, x in zip(widths[1:], line[1:-1])),
  766. line[-1]))
  767. if name in table and depth > 1:
  768. recurse(
  769. table[name].children,
  770. depth-1,
  771. ("|-> ",
  772. "'-> ",
  773. "| ",
  774. " "))
  775. print('%-*s %s%s' % (
  776. widths[0], lines[-1][0],
  777. ' '.join('%*s' % (w, x)
  778. for w, x in zip(widths[1:], lines[-1][1:-1])),
  779. lines[-1][-1]))
  780. def annotate(Result, results, *,
  781. annotate=None,
  782. threshold=None,
  783. read_threshold=None,
  784. prog_threshold=None,
  785. erase_threshold=None,
  786. **args):
  787. # figure out the thresholds
  788. if threshold is None:
  789. threshold = THRESHOLD
  790. elif len(threshold) == 1:
  791. threshold = threshold[0], threshold[0]
  792. if read_threshold is None:
  793. read_t0, read_t1 = threshold
  794. elif len(read_threshold) == 1:
  795. read_t0, read_t1 = read_threshold[0], read_threshold[0]
  796. else:
  797. read_t0, read_t1 = read_threshold
  798. read_t0, read_t1 = min(read_t0, read_t1), max(read_t0, read_t1)
  799. if prog_threshold is None:
  800. prog_t0, prog_t1 = threshold
  801. elif len(prog_threshold) == 1:
  802. prog_t0, prog_t1 = prog_threshold[0], prog_threshold[0]
  803. else:
  804. prog_t0, prog_t1 = prog_threshold
  805. prog_t0, prog_t1 = min(prog_t0, prog_t1), max(prog_t0, prog_t1)
  806. if erase_threshold is None:
  807. erase_t0, erase_t1 = threshold
  808. elif len(erase_threshold) == 1:
  809. erase_t0, erase_t1 = erase_threshold[0], erase_threshold[0]
  810. else:
  811. erase_t0, erase_t1 = erase_threshold
  812. erase_t0, erase_t1 = min(erase_t0, erase_t1), max(erase_t0, erase_t1)
  813. # find maxs
  814. max_readed = max(it.chain((float(r.readed) for r in results), [1]))
  815. max_proged = max(it.chain((float(r.proged) for r in results), [1]))
  816. max_erased = max(it.chain((float(r.erased) for r in results), [1]))
  817. for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
  818. # flatten to line info
  819. results = fold(Result, results, by=['file', 'line'])
  820. table = {r.line: r for r in results if r.file == path}
  821. # calculate spans to show
  822. if not annotate:
  823. spans = []
  824. last = None
  825. func = None
  826. for line, r in sorted(table.items()):
  827. if (float(r.readed) / max_readed >= read_t0
  828. or float(r.proged) / max_proged >= prog_t0
  829. or float(r.erased) / max_erased >= erase_t0):
  830. if last is not None and line - last.stop <= args['context']:
  831. last = range(
  832. last.start,
  833. line+1+args['context'])
  834. else:
  835. if last is not None:
  836. spans.append((last, func))
  837. last = range(
  838. line-args['context'],
  839. line+1+args['context'])
  840. func = r.function
  841. if last is not None:
  842. spans.append((last, func))
  843. with open(path) as f:
  844. skipped = False
  845. for i, line in enumerate(f):
  846. # skip lines not in spans?
  847. if not annotate and not any(i+1 in s for s, _ in spans):
  848. skipped = True
  849. continue
  850. if skipped:
  851. skipped = False
  852. print('%s@@ %s:%d: %s @@%s' % (
  853. '\x1b[36m' if args['color'] else '',
  854. path,
  855. i+1,
  856. next(iter(f for _, f in spans)),
  857. '\x1b[m' if args['color'] else ''))
  858. # build line
  859. if line.endswith('\n'):
  860. line = line[:-1]
  861. if i+1 in table:
  862. r = table[i+1]
  863. line = '%-*s // %s readed, %s proged, %s erased' % (
  864. args['width'],
  865. line,
  866. r.readed,
  867. r.proged,
  868. r.erased)
  869. if args['color']:
  870. if (float(r.readed) / max_readed >= read_t1
  871. or float(r.proged) / max_proged >= prog_t1
  872. or float(r.erased) / max_erased >= erase_t1):
  873. line = '\x1b[1;31m%s\x1b[m' % line
  874. elif (float(r.readed) / max_readed >= read_t0
  875. or float(r.proged) / max_proged >= prog_t0
  876. or float(r.erased) / max_erased >= erase_t0):
  877. line = '\x1b[35m%s\x1b[m' % line
  878. print(line)
  879. def report(obj_path='', trace_paths=[], *,
  880. by=None,
  881. fields=None,
  882. defines=None,
  883. sort=None,
  884. **args):
  885. # figure out what color should be
  886. if args.get('color') == 'auto':
  887. args['color'] = sys.stdout.isatty()
  888. elif args.get('color') == 'always':
  889. args['color'] = True
  890. else:
  891. args['color'] = False
  892. # depth of 0 == m.inf
  893. if args.get('depth') == 0:
  894. args['depth'] = m.inf
  895. # find sizes
  896. if not args.get('use', None):
  897. results = collect(obj_path, trace_paths, **args)
  898. else:
  899. results = []
  900. with openio(args['use']) as f:
  901. reader = csv.DictReader(f, restval='')
  902. for r in reader:
  903. try:
  904. results.append(PerfBdResult(
  905. **{k: r[k] for k in PerfBdResult._by
  906. if k in r and r[k].strip()},
  907. **{k: r['perfbd_'+k] for k in PerfBdResult._fields
  908. if 'perfbd_'+k in r and r['perfbd_'+k].strip()}))
  909. except TypeError:
  910. pass
  911. # fold
  912. results = fold(PerfBdResult, results, by=by, defines=defines)
  913. # sort, note that python's sort is stable
  914. results.sort()
  915. if sort:
  916. for k, reverse in reversed(sort):
  917. results.sort(key=lambda r: (getattr(r, k),)
  918. if getattr(r, k) is not None else (),
  919. reverse=reverse ^ (not k or k in PerfBdResult._fields))
  920. # write results to CSV
  921. if args.get('output'):
  922. with openio(args['output'], 'w') as f:
  923. writer = csv.DictWriter(f,
  924. (by if by is not None else PerfBdResult._by)
  925. + ['perfbd_'+k for k in PerfBdResult._fields])
  926. writer.writeheader()
  927. for r in results:
  928. writer.writerow(
  929. {k: getattr(r, k)
  930. for k in (by if by is not None else PerfBdResult._by)}
  931. | {'perfbd_'+k: getattr(r, k)
  932. for k in PerfBdResult._fields})
  933. # find previous results?
  934. if args.get('diff'):
  935. diff_results = []
  936. try:
  937. with openio(args['diff']) as f:
  938. reader = csv.DictReader(f, restval='')
  939. for r in reader:
  940. try:
  941. diff_results.append(PerfBdResult(
  942. **{k: r[k] for k in PerfBdResult._by
  943. if k in r and r[k].strip()},
  944. **{k: r['perfbd_'+k] for k in PerfBdResult._fields
  945. if 'perfbd_'+k in r
  946. and r['perfbd_'+k].strip()}))
  947. except TypeError:
  948. pass
  949. except FileNotFoundError:
  950. pass
  951. # fold
  952. diff_results = fold(PerfBdResult, diff_results, by=by, defines=defines)
  953. # print table
  954. if not args.get('quiet'):
  955. if (args.get('annotate')
  956. or args.get('threshold')
  957. or args.get('read_threshold')
  958. or args.get('prog_threshold')
  959. or args.get('erase_threshold')):
  960. # annotate sources
  961. annotate(PerfBdResult, results, **args)
  962. else:
  963. # print table
  964. table(PerfBdResult, results,
  965. diff_results if args.get('diff') else None,
  966. by=by if by is not None else ['function'],
  967. fields=fields,
  968. sort=sort,
  969. **args)
  970. def main(**args):
  971. if args.get('record'):
  972. return record(**args)
  973. else:
  974. return report(**args)
  975. if __name__ == "__main__":
  976. import argparse
  977. import sys
  978. parser = argparse.ArgumentParser(
  979. description="Aggregate and report call-stack propagated "
  980. "block-device operations from trace output.",
  981. allow_abbrev=False)
  982. parser.add_argument(
  983. 'obj_path',
  984. nargs='?',
  985. help="Input executable for mapping addresses to symbols.")
  986. parser.add_argument(
  987. 'trace_paths',
  988. nargs='*',
  989. help="Input *.trace files.")
  990. parser.add_argument(
  991. '-v', '--verbose',
  992. action='store_true',
  993. help="Output commands that run behind the scenes.")
  994. parser.add_argument(
  995. '-q', '--quiet',
  996. action='store_true',
  997. help="Don't show anything, useful with -o.")
  998. parser.add_argument(
  999. '-o', '--output',
  1000. help="Specify CSV file to store results.")
  1001. parser.add_argument(
  1002. '-u', '--use',
  1003. help="Don't parse anything, use this CSV file.")
  1004. parser.add_argument(
  1005. '-d', '--diff',
  1006. help="Specify CSV file to diff against.")
  1007. parser.add_argument(
  1008. '-a', '--all',
  1009. action='store_true',
  1010. help="Show all, not just the ones that changed.")
  1011. parser.add_argument(
  1012. '-p', '--percent',
  1013. action='store_true',
  1014. help="Only show percentage change, not a full diff.")
  1015. parser.add_argument(
  1016. '-b', '--by',
  1017. action='append',
  1018. choices=PerfBdResult._by,
  1019. help="Group by this field.")
  1020. parser.add_argument(
  1021. '-f', '--field',
  1022. dest='fields',
  1023. action='append',
  1024. choices=PerfBdResult._fields,
  1025. help="Show this field.")
  1026. parser.add_argument(
  1027. '-D', '--define',
  1028. dest='defines',
  1029. action='append',
  1030. type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
  1031. help="Only include results where this field is this value.")
  1032. class AppendSort(argparse.Action):
  1033. def __call__(self, parser, namespace, value, option):
  1034. if namespace.sort is None:
  1035. namespace.sort = []
  1036. namespace.sort.append((value, True if option == '-S' else False))
  1037. parser.add_argument(
  1038. '-s', '--sort',
  1039. action=AppendSort,
  1040. help="Sort by this fields.")
  1041. parser.add_argument(
  1042. '-S', '--reverse-sort',
  1043. action=AppendSort,
  1044. help="Sort by this fields, but backwards.")
  1045. parser.add_argument(
  1046. '-Y', '--summary',
  1047. action='store_true',
  1048. help="Only show the total.")
  1049. parser.add_argument(
  1050. '-F', '--source',
  1051. dest='sources',
  1052. action='append',
  1053. help="Only consider definitions in this file. Defaults to anything "
  1054. "in the current directory.")
  1055. parser.add_argument(
  1056. '--everything',
  1057. action='store_true',
  1058. help="Include builtin and libc specific symbols.")
  1059. parser.add_argument(
  1060. '-P', '--propagate',
  1061. type=lambda x: int(x, 0),
  1062. help="Depth to propagate samples up the call-stack. 0 propagates up "
  1063. "to the entry point, 1 does no propagation. Defaults to 0.")
  1064. parser.add_argument(
  1065. '-Z', '--depth',
  1066. nargs='?',
  1067. type=lambda x: int(x, 0),
  1068. const=0,
  1069. help="Depth of function calls to show. 0 shows all calls but may not "
  1070. "terminate!")
  1071. parser.add_argument(
  1072. '-A', '--annotate',
  1073. action='store_true',
  1074. help="Show source files annotated with coverage info.")
  1075. parser.add_argument(
  1076. '-T', '--threshold',
  1077. nargs='?',
  1078. type=lambda x: tuple(float(x) for x in x.split(',')),
  1079. const=THRESHOLD,
  1080. help="Show lines with any ops above this threshold as a percent of "
  1081. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1082. parser.add_argument(
  1083. '--read-threshold',
  1084. nargs='?',
  1085. type=lambda x: tuple(float(x) for x in x.split(',')),
  1086. const=THRESHOLD,
  1087. help="Show lines with reads above this threshold as a percent of "
  1088. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1089. parser.add_argument(
  1090. '--prog-threshold',
  1091. nargs='?',
  1092. type=lambda x: tuple(float(x) for x in x.split(',')),
  1093. const=THRESHOLD,
  1094. help="Show lines with progs above this threshold as a percent of "
  1095. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1096. parser.add_argument(
  1097. '--erase-threshold',
  1098. nargs='?',
  1099. type=lambda x: tuple(float(x) for x in x.split(',')),
  1100. const=THRESHOLD,
  1101. help="Show lines with erases above this threshold as a percent of "
  1102. "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
  1103. parser.add_argument(
  1104. '-c', '--context',
  1105. type=lambda x: int(x, 0),
  1106. default=3,
  1107. help="Show n additional lines of context. Defaults to 3.")
  1108. parser.add_argument(
  1109. '-W', '--width',
  1110. type=lambda x: int(x, 0),
  1111. default=80,
  1112. help="Assume source is styled with this many columns. Defaults to 80.")
  1113. parser.add_argument(
  1114. '--color',
  1115. choices=['never', 'always', 'auto'],
  1116. default='auto',
  1117. help="When to use terminal colors. Defaults to 'auto'.")
  1118. parser.add_argument(
  1119. '-j', '--jobs',
  1120. nargs='?',
  1121. type=lambda x: int(x, 0),
  1122. const=0,
  1123. help="Number of processes to use. 0 spawns one process per core.")
  1124. parser.add_argument(
  1125. '--objdump-path',
  1126. type=lambda x: x.split(),
  1127. default=OBJDUMP_PATH,
  1128. help="Path to the objdump executable, may include flags. "
  1129. "Defaults to %r." % OBJDUMP_PATH)
  1130. sys.exit(main(**{k: v
  1131. for k, v in vars(parser.parse_intermixed_args()).items()
  1132. if v is not None}))