stack.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find stack usage at the function level. Will detect recursion and
  4. # report as infinite stack usage.
  5. #
  6. import collections as co
  7. import csv
  8. import glob
  9. import itertools as it
  10. import math as m
  11. import os
  12. import re
  13. CI_PATHS = ['*.ci']
  14. # integer fields
  15. class IntField(co.namedtuple('IntField', 'x')):
  16. __slots__ = ()
  17. def __new__(cls, x):
  18. if isinstance(x, IntField):
  19. return x
  20. if isinstance(x, str):
  21. try:
  22. x = int(x, 0)
  23. except ValueError:
  24. # also accept +-∞ and +-inf
  25. if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
  26. x = float('inf')
  27. elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
  28. x = float('-inf')
  29. else:
  30. raise
  31. return super().__new__(cls, x)
  32. def __int__(self):
  33. assert not m.isinf(self.x)
  34. return self.x
  35. def __float__(self):
  36. return float(self.x)
  37. def __str__(self):
  38. if self.x == float('inf'):
  39. return '∞'
  40. elif self.x == float('-inf'):
  41. return '-∞'
  42. else:
  43. return str(self.x)
  44. none = '%7s' % '-'
  45. def table(self):
  46. return '%7s' % (self,)
  47. diff_none = '%7s' % '-'
  48. diff_table = table
  49. def diff_diff(self, other):
  50. new = self.x if self else 0
  51. old = other.x if other else 0
  52. diff = new - old
  53. if diff == float('+inf'):
  54. return '%7s' % '+∞'
  55. elif diff == float('-inf'):
  56. return '%7s' % '-∞'
  57. else:
  58. return '%+7d' % diff
  59. def ratio(self, other):
  60. new = self.x if self else 0
  61. old = other.x if other else 0
  62. if m.isinf(new) and m.isinf(old):
  63. return 0.0
  64. elif m.isinf(new):
  65. return float('+inf')
  66. elif m.isinf(old):
  67. return float('-inf')
  68. elif not old and not new:
  69. return 0.0
  70. elif not old:
  71. return 1.0
  72. else:
  73. return (new-old) / old
  74. def __add__(self, other):
  75. return IntField(self.x + other.x)
  76. def __mul__(self, other):
  77. return IntField(self.x * other.x)
  78. def __lt__(self, other):
  79. return self.x < other.x
  80. def __gt__(self, other):
  81. return self.__class__.__lt__(other, self)
  82. def __le__(self, other):
  83. return not self.__gt__(other)
  84. def __ge__(self, other):
  85. return not self.__lt__(other)
  86. def __truediv__(self, n):
  87. if m.isinf(self.x):
  88. return self
  89. else:
  90. return IntField(round(self.x / n))
  91. # size results
  92. class StackResult(co.namedtuple('StackResult',
  93. 'file,function,stack_frame,stack_limit')):
  94. __slots__ = ()
  95. def __new__(cls, file, function, stack_frame, stack_limit):
  96. return super().__new__(cls, file, function,
  97. IntField(stack_frame), IntField(stack_limit))
  98. def __add__(self, other):
  99. return StackResult(self.file, self.function,
  100. self.stack_frame + other.stack_frame,
  101. max(self.stack_limit, other.stack_limit))
  102. def openio(path, mode='r'):
  103. if path == '-':
  104. if 'r' in mode:
  105. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  106. else:
  107. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  108. else:
  109. return open(path, mode)
  110. def collect(paths, *,
  111. everything=False,
  112. **args):
  113. # parse the vcg format
  114. k_pattern = re.compile('([a-z]+)\s*:', re.DOTALL)
  115. v_pattern = re.compile('(?:"(.*?)"|([a-z]+))', re.DOTALL)
  116. def parse_vcg(rest):
  117. def parse_vcg(rest):
  118. node = []
  119. while True:
  120. rest = rest.lstrip()
  121. m = k_pattern.match(rest)
  122. if not m:
  123. return (node, rest)
  124. k, rest = m.group(1), rest[m.end(0):]
  125. rest = rest.lstrip()
  126. if rest.startswith('{'):
  127. v, rest = parse_vcg(rest[1:])
  128. assert rest[0] == '}', "unexpected %r" % rest[0:1]
  129. rest = rest[1:]
  130. node.append((k, v))
  131. else:
  132. m = v_pattern.match(rest)
  133. assert m, "unexpected %r" % rest[0:1]
  134. v, rest = m.group(1) or m.group(2), rest[m.end(0):]
  135. node.append((k, v))
  136. node, rest = parse_vcg(rest)
  137. assert rest == '', "unexpected %r" % rest[0:1]
  138. return node
  139. # collect into functions
  140. callgraph = co.defaultdict(lambda: (None, None, 0, set()))
  141. f_pattern = re.compile(
  142. r'([^\\]*)\\n([^:]*)[^\\]*\\n([0-9]+) bytes \((.*)\)')
  143. for path in paths:
  144. with open(path) as f:
  145. vcg = parse_vcg(f.read())
  146. for k, graph in vcg:
  147. if k != 'graph':
  148. continue
  149. for k, info in graph:
  150. if k == 'node':
  151. info = dict(info)
  152. m = f_pattern.match(info['label'])
  153. if m:
  154. function, file, size, type = m.groups()
  155. if (not args.get('quiet')
  156. and 'static' not in type
  157. and 'bounded' not in type):
  158. print('warning: found non-static stack for %s (%s)'
  159. % (function, type, size))
  160. _, _, _, targets = callgraph[info['title']]
  161. callgraph[info['title']] = (
  162. file, function, int(size), targets)
  163. elif k == 'edge':
  164. info = dict(info)
  165. _, _, _, targets = callgraph[info['sourcename']]
  166. targets.add(info['targetname'])
  167. else:
  168. continue
  169. if not everything:
  170. for source, (s_file, s_function, _, _) in list(callgraph.items()):
  171. # discard internal functions
  172. if s_file.startswith('<') or s_file.startswith('/usr/include'):
  173. del callgraph[source]
  174. # find maximum stack size recursively, this requires also detecting cycles
  175. # (in case of recursion)
  176. def find_limit(source, seen=None):
  177. seen = seen or set()
  178. if source not in callgraph:
  179. return 0
  180. _, _, frame, targets = callgraph[source]
  181. limit = 0
  182. for target in targets:
  183. if target in seen:
  184. # found a cycle
  185. return float('inf')
  186. limit_ = find_limit(target, seen | {target})
  187. limit = max(limit, limit_)
  188. return frame + limit
  189. def find_calls(targets):
  190. calls = set()
  191. for target in targets:
  192. if target in callgraph:
  193. t_file, t_function, _, _ = callgraph[target]
  194. calls.add((t_file, t_function))
  195. return calls
  196. # build results
  197. results = []
  198. calls = {}
  199. for source, (s_file, s_function, frame, targets) in callgraph.items():
  200. limit = find_limit(source)
  201. cs = find_calls(targets)
  202. results.append(StackResult(s_file, s_function, frame, limit))
  203. calls[(s_file, s_function)] = cs
  204. return results, calls
  205. def fold(results, *,
  206. by=['file', 'function'],
  207. **_):
  208. folding = co.OrderedDict()
  209. for r in results:
  210. name = tuple(getattr(r, k) for k in by)
  211. if name not in folding:
  212. folding[name] = []
  213. folding[name].append(r)
  214. folded = []
  215. for rs in folding.values():
  216. folded.append(sum(rs[1:], start=rs[0]))
  217. return folded
  218. def fold_calls(calls, *,
  219. by=['file', 'function'],
  220. **_):
  221. def by_(name):
  222. file, function = name
  223. return (((file,) if 'file' in by else ())
  224. + ((function,) if 'function' in by else ()))
  225. folded = {}
  226. for name, cs in calls.items():
  227. name = by_(name)
  228. if name not in folded:
  229. folded[name] = set()
  230. folded[name] |= {by_(c) for c in cs}
  231. return folded
  232. def table(results, calls, diff_results=None, *,
  233. by_file=False,
  234. limit_sort=False,
  235. reverse_limit_sort=False,
  236. frame_sort=False,
  237. reverse_frame_sort=False,
  238. summary=False,
  239. all=False,
  240. percent=False,
  241. tree=False,
  242. depth=None,
  243. **_):
  244. all_, all = all, __builtins__.all
  245. # tree doesn't really make sense with depth=0, assume depth=inf
  246. if depth is None:
  247. depth = float('inf') if tree else 0
  248. # fold
  249. results = fold(results, by=['file' if by_file else 'function'])
  250. calls = fold_calls(calls, by=['file' if by_file else 'function'])
  251. if diff_results is not None:
  252. diff_results = fold(diff_results,
  253. by=['file' if by_file else 'function'])
  254. table = {
  255. r.file if by_file else r.function: r
  256. for r in results}
  257. diff_table = {
  258. r.file if by_file else r.function: r
  259. for r in diff_results or []}
  260. # sort, note that python's sort is stable
  261. names = list(table.keys() | diff_table.keys())
  262. names.sort()
  263. if diff_results is not None:
  264. names.sort(key=lambda n: -IntField.ratio(
  265. table[n].stack_frame if n in table else None,
  266. diff_table[n].stack_frame if n in diff_table else None))
  267. if limit_sort:
  268. names.sort(key=lambda n: (table[n].stack_limit,) if n in table else (),
  269. reverse=True)
  270. elif reverse_limit_sort:
  271. names.sort(key=lambda n: (table[n].stack_limit,) if n in table else (),
  272. reverse=False)
  273. elif frame_sort:
  274. names.sort(key=lambda n: (table[n].stack_frame,) if n in table else (),
  275. reverse=True)
  276. elif reverse_frame_sort:
  277. names.sort(key=lambda n: (table[n].stack_frame,) if n in table else (),
  278. reverse=False)
  279. # adjust the name width based on the expected call depth, note that we
  280. # can't always find the depth due to recursion
  281. width = 36 + (4*depth if not m.isinf(depth) else 0)
  282. # print header
  283. if not tree:
  284. print('%-*s' % (width, '%s%s' % (
  285. 'file' if by_file else 'function',
  286. ' (%d added, %d removed)' % (
  287. sum(1 for n in table if n not in diff_table),
  288. sum(1 for n in diff_table if n not in table))
  289. if diff_results is not None and not percent else '')
  290. if not summary else ''),
  291. end='')
  292. if diff_results is None:
  293. print(' %s %s' % (
  294. 'frame'.rjust(len(IntField.none)),
  295. 'limit'.rjust(len(IntField.none))))
  296. elif percent:
  297. print(' %s %s' % (
  298. 'frame'.rjust(len(IntField.diff_none)),
  299. 'limit'.rjust(len(IntField.diff_none))))
  300. else:
  301. print(' %s %s %s %s %s %s' % (
  302. 'oframe'.rjust(len(IntField.diff_none)),
  303. 'olimit'.rjust(len(IntField.diff_none)),
  304. 'nframe'.rjust(len(IntField.diff_none)),
  305. 'nlimit'.rjust(len(IntField.diff_none)),
  306. 'dframe'.rjust(len(IntField.diff_none)),
  307. 'dlimit'.rjust(len(IntField.diff_none))))
  308. # print entries
  309. if not summary:
  310. # print the tree recursively
  311. def table_calls(names_, depth,
  312. prefixes=('', '', '', '')):
  313. for i, name in enumerate(names_):
  314. r = table.get(name)
  315. if diff_results is not None:
  316. diff_r = diff_table.get(name)
  317. ratio = IntField.ratio(
  318. r.stack_limit if r else None,
  319. diff_r.stack_limit if diff_r else None)
  320. if not ratio and not all_:
  321. continue
  322. is_last = (i == len(names_)-1)
  323. print('%-*s' % (width, prefixes[0+is_last] + name), end='')
  324. if tree:
  325. print()
  326. elif diff_results is None:
  327. print(' %s %s' % (
  328. r.stack_frame.table()
  329. if r else IntField.none,
  330. r.stack_limit.table()
  331. if r else IntField.none))
  332. elif percent:
  333. print(' %s %s%s' % (
  334. r.stack_frame.diff_table()
  335. if r else IntField.diff_none,
  336. r.stack_limit.diff_table()
  337. if r else IntField.diff_none,
  338. ' (%s)' % (
  339. '+∞%' if ratio == float('+inf')
  340. else '-∞%' if ratio == float('-inf')
  341. else '%+.1f%%' % (100*ratio))))
  342. else:
  343. print(' %s %s %s %s %s %s%s' % (
  344. diff_r.stack_frame.diff_table()
  345. if diff_r else IntField.diff_none,
  346. diff_r.stack_limit.diff_table()
  347. if diff_r else IntField.diff_none,
  348. r.stack_frame.diff_table()
  349. if r else IntField.diff_none,
  350. r.stack_limit.diff_table()
  351. if r else IntField.diff_none,
  352. IntField.diff_diff(
  353. r.stack_frame if r else None,
  354. diff_r.stack_frame if diff_r else None)
  355. if r or diff_r else IntField.diff_none,
  356. IntField.diff_diff(
  357. r.stack_limit if r else None,
  358. diff_r.stack_limit if diff_r else None)
  359. if r or diff_r else IntField.diff_none,
  360. ' (%s)' % (
  361. '+∞%' if ratio == float('+inf')
  362. else '-∞%' if ratio == float('-inf')
  363. else '%+.1f%%' % (100*ratio))
  364. if ratio else ''))
  365. # recurse?
  366. if depth > 0:
  367. cs = calls.get((name,), set())
  368. table_calls(
  369. [n for n in names if (n,) in cs],
  370. depth-1,
  371. ( prefixes[2+is_last] + "|-> ",
  372. prefixes[2+is_last] + "'-> ",
  373. prefixes[2+is_last] + "| ",
  374. prefixes[2+is_last] + " "))
  375. table_calls(names, depth)
  376. # print total
  377. if not tree:
  378. total = fold(results, by=[])
  379. r = total[0] if total else None
  380. if diff_results is not None:
  381. diff_total = fold(diff_results, by=[])
  382. diff_r = diff_total[0] if diff_total else None
  383. ratio = IntField.ratio(
  384. r.stack_limit if r else None,
  385. diff_r.stack_limit if diff_r else None)
  386. print('%-*s' % (width, 'TOTAL'), end='')
  387. if diff_results is None:
  388. print(' %s %s' % (
  389. r.stack_frame.table()
  390. if r else IntField.none,
  391. r.stack_limit.table()
  392. if r else IntField.none))
  393. elif percent:
  394. print(' %s %s%s' % (
  395. r.stack_frame.diff_table()
  396. if r else IntField.diff_none,
  397. r.stack_limit.diff_table()
  398. if r else IntField.diff_none,
  399. ' (%s)' % (
  400. '+∞%' if ratio == float('+inf')
  401. else '-∞%' if ratio == float('-inf')
  402. else '%+.1f%%' % (100*ratio))))
  403. else:
  404. print(' %s %s %s %s %s %s%s' % (
  405. diff_r.stack_frame.diff_table()
  406. if diff_r else IntField.diff_none,
  407. diff_r.stack_limit.diff_table()
  408. if diff_r else IntField.diff_none,
  409. r.stack_frame.diff_table()
  410. if r else IntField.diff_none,
  411. r.stack_limit.diff_table()
  412. if r else IntField.diff_none,
  413. IntField.diff_diff(
  414. r.stack_frame if r else None,
  415. diff_r.stack_frame if diff_r else None)
  416. if r or diff_r else IntField.diff_none,
  417. IntField.diff_diff(
  418. r.stack_limit if r else None,
  419. diff_r.stack_limit if diff_r else None)
  420. if r or diff_r else IntField.diff_none,
  421. ' (%s)' % (
  422. '+∞%' if ratio == float('+inf')
  423. else '-∞%' if ratio == float('-inf')
  424. else '%+.1f%%' % (100*ratio))
  425. if ratio else ''))
  426. def main(ci_paths, **args):
  427. # find sizes
  428. if not args.get('use', None):
  429. # find .ci files
  430. paths = []
  431. for path in ci_paths:
  432. if os.path.isdir(path):
  433. path = path + '/*.ci'
  434. for path in glob.glob(path):
  435. paths.append(path)
  436. if not paths:
  437. print('no .ci files found in %r?' % ci_paths)
  438. sys.exit(-1)
  439. results, calls = collect(paths, **args)
  440. else:
  441. results = []
  442. with openio(args['use']) as f:
  443. reader = csv.DictReader(f)
  444. for r in reader:
  445. try:
  446. results.append(StackResult(**{
  447. k: v for k, v in r.items()
  448. if k in StackResult._fields}))
  449. except TypeError:
  450. pass
  451. calls = {}
  452. # fold to remove duplicates
  453. results = fold(results)
  454. # sort because why not
  455. results.sort()
  456. # write results to CSV
  457. if args.get('output'):
  458. with openio(args['output'], 'w') as f:
  459. writer = csv.DictWriter(f, StackResult._fields)
  460. writer.writeheader()
  461. for r in results:
  462. writer.writerow(r._asdict())
  463. # find previous results?
  464. if args.get('diff'):
  465. diff_results = []
  466. try:
  467. with openio(args['diff']) as f:
  468. reader = csv.DictReader(f)
  469. for r in reader:
  470. try:
  471. diff_results.append(StackResult(**{
  472. k: v for k, v in r.items()
  473. if k in StackResult._fields}))
  474. except TypeError:
  475. pass
  476. except FileNotFoundError:
  477. pass
  478. # fold to remove duplicates
  479. diff_results = fold(diff_results)
  480. # print table
  481. if not args.get('quiet'):
  482. table(
  483. results,
  484. calls,
  485. diff_results if args.get('diff') else None,
  486. **args)
  487. # error on recursion
  488. if args.get('error_on_recursion') and any(
  489. m.isinf(float(r.stack_limit)) for r in results):
  490. sys.exit(2)
  491. if __name__ == "__main__":
  492. import argparse
  493. import sys
  494. parser = argparse.ArgumentParser(
  495. description="Find stack usage at the function level.")
  496. parser.add_argument(
  497. 'ci_paths',
  498. nargs='*',
  499. default=CI_PATHS,
  500. help="Description of where to find *.ci files. May be a directory "
  501. "or a list of paths. Defaults to %r." % CI_PATHS)
  502. parser.add_argument(
  503. '-v', '--verbose',
  504. action='store_true',
  505. help="Output commands that run behind the scenes.")
  506. parser.add_argument(
  507. '-q', '--quiet',
  508. action='store_true',
  509. help="Don't show anything, useful with -o.")
  510. parser.add_argument(
  511. '-o', '--output',
  512. help="Specify CSV file to store results.")
  513. parser.add_argument(
  514. '-u', '--use',
  515. help="Don't parse anything, use this CSV file.")
  516. parser.add_argument(
  517. '-d', '--diff',
  518. help="Specify CSV file to diff against.")
  519. parser.add_argument(
  520. '-a', '--all',
  521. action='store_true',
  522. help="Show all, not just the ones that changed.")
  523. parser.add_argument(
  524. '-p', '--percent',
  525. action='store_true',
  526. help="Only show percentage change, not a full diff.")
  527. parser.add_argument(
  528. '-t', '--tree',
  529. action='store_true',
  530. help="Only show the function call tree.")
  531. parser.add_argument(
  532. '-b', '--by-file',
  533. action='store_true',
  534. help="Group by file.")
  535. parser.add_argument(
  536. '-s', '--limit-sort',
  537. action='store_true',
  538. help="Sort by stack limit.")
  539. parser.add_argument(
  540. '-S', '--reverse-limit-sort',
  541. action='store_true',
  542. help="Sort by stack limit, but backwards.")
  543. parser.add_argument(
  544. '--frame-sort',
  545. action='store_true',
  546. help="Sort by stack frame.")
  547. parser.add_argument(
  548. '--reverse-frame-sort',
  549. action='store_true',
  550. help="Sort by stack frame, but backwards.")
  551. parser.add_argument(
  552. '-Y', '--summary',
  553. action='store_true',
  554. help="Only show the total size.")
  555. parser.add_argument(
  556. '-L', '--depth',
  557. nargs='?',
  558. type=lambda x: int(x, 0),
  559. const=float('inf'),
  560. help="Depth of function calls to show.")
  561. parser.add_argument(
  562. '-e', '--error-on-recursion',
  563. action='store_true',
  564. help="Error if any functions are recursive.")
  565. parser.add_argument(
  566. '-A', '--everything',
  567. action='store_true',
  568. help="Include builtin and libc specific symbols.")
  569. parser.add_argument(
  570. '--build-dir',
  571. help="Specify the relative build directory. Used to map object files "
  572. "to the correct source files.")
  573. sys.exit(main(**{k: v
  574. for k, v in vars(parser.parse_args()).items()
  575. if v is not None}))