| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429 |
- #!/usr/bin/env python3
- #
- # Script to find stack usage at the function level. Will detect recursion and
- # report as infinite stack usage.
- #
- import os
- import glob
- import itertools as it
- import re
- import csv
- import collections as co
- import math as m
- CI_PATHS = ['*.ci']
- def openio(path, mode='r'):
- if path == '-':
- if 'r' in mode:
- return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
- else:
- return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
- else:
- return open(path, mode)
- class StackResult(co.namedtuple('StackResult', 'stack_frame,stack_limit')):
- __slots__ = ()
- def __new__(cls, stack_frame=0, stack_limit=0):
- return super().__new__(cls,
- int(stack_frame),
- float(stack_limit))
- def __add__(self, other):
- return self.__class__(
- self.stack_frame + other.stack_frame,
- max(self.stack_limit, other.stack_limit))
- def __sub__(self, other):
- return StackDiff(other, self)
- def __rsub__(self, other):
- return self.__class__.__sub__(other, self)
- def key(self, **args):
- if args.get('limit_sort'):
- return -self.stack_limit
- elif args.get('reverse_limit_sort'):
- return +self.stack_limit
- elif args.get('frame_sort'):
- return -self.stack_frame
- elif args.get('reverse_frame_sort'):
- return +self.stack_frame
- else:
- return None
- _header = '%7s %7s' % ('frame', 'limit')
- def __str__(self):
- return '%7d %7s' % (
- self.stack_frame,
- '∞' if m.isinf(self.stack_limit) else int(self.stack_limit))
- class StackDiff(co.namedtuple('StackDiff', 'old,new')):
- __slots__ = ()
- def ratio(self):
- old_limit = self.old.stack_limit if self.old is not None else 0
- new_limit = self.new.stack_limit if self.new is not None else 0
- return (0.0 if m.isinf(new_limit) and m.isinf(old_limit)
- else +float('inf') if m.isinf(new_limit)
- else -float('inf') if m.isinf(old_limit)
- else 0.0 if not old_limit and not new_limit
- else 1.0 if not old_limit
- else (new_limit-old_limit) / old_limit)
- def key(self, **args):
- return (
- self.new.key(**args) if self.new is not None else 0,
- -self.ratio())
- def __bool__(self):
- return bool(self.ratio())
- _header = '%15s %15s %15s' % ('old', 'new', 'diff')
- def __str__(self):
- old_frame = self.old.stack_frame if self.old is not None else 0
- old_limit = self.old.stack_limit if self.old is not None else 0
- new_frame = self.new.stack_frame if self.new is not None else 0
- new_limit = self.new.stack_limit if self.new is not None else 0
- diff_frame = new_frame - old_frame
- diff_limit = (0 if m.isinf(new_limit) and m.isinf(old_limit)
- else new_limit - old_limit)
- ratio = self.ratio()
- return '%7s %7s %7s %7s %+7d %7s%s' % (
- old_frame if self.old is not None else '-',
- ('∞' if m.isinf(old_limit) else int(old_limit))
- if self.old is not None else '-',
- new_frame if self.new is not None else '-',
- ('∞' if m.isinf(new_limit) else int(new_limit))
- if self.new is not None else '-',
- diff_frame,
- '+∞' if diff_limit > 0 and m.isinf(diff_limit)
- else '-∞' if diff_limit < 0 and m.isinf(diff_limit)
- else '%+d' % diff_limit,
- '' if not ratio
- else ' (+∞%)' if ratio > 0 and m.isinf(ratio)
- else ' (-∞%)' if ratio < 0 and m.isinf(ratio)
- else ' (%+.1f%%)' % (100*ratio))
- def collect(paths, **args):
- # parse the vcg format
- k_pattern = re.compile('([a-z]+)\s*:', re.DOTALL)
- v_pattern = re.compile('(?:"(.*?)"|([a-z]+))', re.DOTALL)
- def parse_vcg(rest):
- def parse_vcg(rest):
- node = []
- while True:
- rest = rest.lstrip()
- m = k_pattern.match(rest)
- if not m:
- return (node, rest)
- k, rest = m.group(1), rest[m.end(0):]
- rest = rest.lstrip()
- if rest.startswith('{'):
- v, rest = parse_vcg(rest[1:])
- assert rest[0] == '}', "unexpected %r" % rest[0:1]
- rest = rest[1:]
- node.append((k, v))
- else:
- m = v_pattern.match(rest)
- assert m, "unexpected %r" % rest[0:1]
- v, rest = m.group(1) or m.group(2), rest[m.end(0):]
- node.append((k, v))
- node, rest = parse_vcg(rest)
- assert rest == '', "unexpected %r" % rest[0:1]
- return node
- # collect into functions
- callgraph = co.defaultdict(lambda: (None, None, 0, set()))
- f_pattern = re.compile(
- r'([^\\]*)\\n([^:]*)[^\\]*\\n([0-9]+) bytes \((.*)\)')
- for path in paths:
- with open(path) as f:
- vcg = parse_vcg(f.read())
- for k, graph in vcg:
- if k != 'graph':
- continue
- for k, info in graph:
- if k == 'node':
- info = dict(info)
- m = f_pattern.match(info['label'])
- if m:
- function, file, size, type = m.groups()
- if not args.get('quiet') and type != 'static':
- print('warning: found non-static stack for %s (%s)'
- % (function, type))
- _, _, _, targets = callgraph[info['title']]
- callgraph[info['title']] = (
- file, function, int(size), targets)
- elif k == 'edge':
- info = dict(info)
- _, _, _, targets = callgraph[info['sourcename']]
- targets.add(info['targetname'])
- else:
- continue
- if not args.get('everything'):
- for source, (s_file, s_function, _, _) in list(callgraph.items()):
- # discard internal functions
- if s_file.startswith('<') or s_file.startswith('/usr/include'):
- del callgraph[source]
- # find maximum stack size recursively, this requires also detecting cycles
- # (in case of recursion)
- def find_limit(source, seen=None):
- seen = seen or set()
- if source not in callgraph:
- return 0
- _, _, frame, targets = callgraph[source]
- limit = 0
- for target in targets:
- if target in seen:
- # found a cycle
- return float('inf')
- limit_ = find_limit(target, seen | {target})
- limit = max(limit, limit_)
- return frame + limit
- def find_calls(targets):
- calls = set()
- for target in targets:
- if target in callgraph:
- t_file, t_function, _, _ = callgraph[target]
- calls.add((t_file, t_function))
- return calls
- # build results
- results = {}
- result_calls = {}
- for source, (s_file, s_function, frame, targets) in callgraph.items():
- limit = find_limit(source)
- calls = find_calls(targets)
- results[(s_file, s_function)] = StackResult(frame, limit)
- result_calls[(s_file, s_function)] = calls
- return results, result_calls
- def main(**args):
- # find sizes
- if not args.get('use', None):
- # find .ci files
- paths = []
- for path in args['ci_paths']:
- if os.path.isdir(path):
- path = path + '/*.ci'
- for path in glob.glob(path):
- paths.append(path)
- if not paths:
- print('no .ci files found in %r?' % args['ci_paths'])
- sys.exit(-1)
- results, result_calls = collect(paths, **args)
- else:
- with openio(args['use']) as f:
- r = csv.DictReader(f)
- results = {
- (result['file'], result['name']): StackResult(
- *(result[f] for f in StackResult._fields))
- for result in r
- if all(result.get(f) not in {None, ''}
- for f in StackResult._fields)}
- result_calls = {}
- # find previous results?
- if args.get('diff'):
- try:
- with openio(args['diff']) as f:
- r = csv.DictReader(f)
- prev_results = {
- (result['file'], result['name']): StackResult(
- *(result[f] for f in StackResult._fields))
- for result in r
- if all(result.get(f) not in {None, ''}
- for f in StackResult._fields)}
- except FileNotFoundError:
- prev_results = []
- # write results to CSV
- if args.get('output'):
- merged_results = co.defaultdict(lambda: {})
- other_fields = []
- # merge?
- if args.get('merge'):
- try:
- with openio(args['merge']) as f:
- r = csv.DictReader(f)
- for result in r:
- file = result.pop('file', '')
- func = result.pop('name', '')
- for f in StackResult._fields:
- result.pop(f, None)
- merged_results[(file, func)] = result
- other_fields = result.keys()
- except FileNotFoundError:
- pass
- for (file, func), result in results.items():
- merged_results[(file, func)] |= result._asdict()
- with openio(args['output'], 'w') as f:
- w = csv.DictWriter(f, ['file', 'name',
- *other_fields, *StackResult._fields])
- w.writeheader()
- for (file, func), result in sorted(merged_results.items()):
- w.writerow({'file': file, 'name': func, **result})
- # print results
- def print_header(by):
- if by == 'total':
- entry = lambda k: 'TOTAL'
- elif by == 'file':
- entry = lambda k: k[0]
- else:
- entry = lambda k: k[1]
- if not args.get('diff'):
- print('%-36s %s' % (by, StackResult._header))
- else:
- old = {entry(k) for k in results.keys()}
- new = {entry(k) for k in prev_results.keys()}
- print('%-36s %s' % (
- '%s (%d added, %d removed)' % (by,
- sum(1 for k in new if k not in old),
- sum(1 for k in old if k not in new))
- if by else '',
- StackDiff._header))
- def print_entries(by):
- # print optional tree of dependencies
- def print_calls(entries, entry_calls, depth,
- filter=lambda _: True,
- prefixes=('', '', '', '')):
- filtered_entries = {
- name: result for name, result in entries.items()
- if filter(name)}
- for i, (name, result) in enumerate(sorted(filtered_entries.items(),
- key=lambda p: (p[1].key(**args), p))):
- last = (i == len(filtered_entries)-1)
- print('%-36s %s' % (prefixes[0+last] + name, result))
- if depth > 0 and by != 'total':
- calls = entry_calls.get(name, set())
- print_calls(entries, entry_calls, depth-1,
- lambda name: name in calls,
- ( prefixes[2+last] + "|-> ",
- prefixes[2+last] + "'-> ",
- prefixes[2+last] + "| ",
- prefixes[2+last] + " "))
- if by == 'total':
- entry = lambda k: 'TOTAL'
- elif by == 'file':
- entry = lambda k: k[0]
- else:
- entry = lambda k: k[1]
- entries = co.defaultdict(lambda: StackResult())
- for k, result in results.items():
- entries[entry(k)] += result
- entry_calls = co.defaultdict(lambda: set())
- for k, calls in result_calls.items():
- entry_calls[entry(k)] |= {entry(c) for c in calls}
- if not args.get('diff'):
- print_calls(
- entries,
- entry_calls,
- args.get('depth', 0))
- else:
- prev_entries = co.defaultdict(lambda: StackResult())
- for k, result in prev_results.items():
- prev_entries[entry(k)] += result
- diff_entries = {name: entries.get(name) - prev_entries.get(name)
- for name in (entries.keys() | prev_entries.keys())}
- print_calls(
- {name: diff for name, diff in diff_entries.items()
- if diff or args.get('all')},
- entry_calls,
- args.get('depth', 0))
- if args.get('quiet'):
- pass
- elif args.get('summary'):
- print_header('')
- print_entries('total')
- elif args.get('files'):
- print_header('file')
- print_entries('file')
- print_entries('total')
- else:
- print_header('function')
- print_entries('function')
- print_entries('total')
- # catch recursion
- if args.get('error_on_recursion') and any(
- m.isinf(limit) for _, _, _, limit, _ in results):
- sys.exit(2)
- if __name__ == "__main__":
- import argparse
- import sys
- parser = argparse.ArgumentParser(
- description="Find stack usage at the function level.")
- parser.add_argument('ci_paths', nargs='*', default=CI_PATHS,
- help="Description of where to find *.ci files. May be a directory \
- or a list of paths. Defaults to %r." % CI_PATHS)
- parser.add_argument('-v', '--verbose', action='store_true',
- help="Output commands that run behind the scenes.")
- parser.add_argument('-q', '--quiet', action='store_true',
- help="Don't show anything, useful with -o.")
- parser.add_argument('-o', '--output',
- help="Specify CSV file to store results.")
- parser.add_argument('-u', '--use',
- help="Don't parse callgraph files, instead use this CSV file.")
- parser.add_argument('-d', '--diff',
- help="Specify CSV file to diff against.")
- parser.add_argument('-m', '--merge',
- help="Merge with an existing CSV file when writing to output.")
- parser.add_argument('-a', '--all', action='store_true',
- help="Show all functions, not just the ones that changed.")
- parser.add_argument('-A', '--everything', action='store_true',
- help="Include builtin and libc specific symbols.")
- parser.add_argument('--frame-sort', action='store_true',
- help="Sort by stack frame size.")
- parser.add_argument('--reverse-frame-sort', action='store_true',
- help="Sort by stack frame size, but backwards.")
- parser.add_argument('-s', '--limit-sort', action='store_true',
- help="Sort by stack limit.")
- parser.add_argument('-S', '--reverse-limit-sort', action='store_true',
- help="Sort by stack limit, but backwards.")
- parser.add_argument('-L', '--depth', default=0, type=lambda x: int(x, 0),
- nargs='?', const=float('inf'),
- help="Depth of dependencies to show.")
- parser.add_argument('-F', '--files', action='store_true',
- help="Show file-level calls.")
- parser.add_argument('-Y', '--summary', action='store_true',
- help="Only show the total stack size.")
- parser.add_argument('-e', '--error-on-recursion', action='store_true',
- help="Error if any functions are recursive.")
- parser.add_argument('--build-dir',
- help="Specify the relative build directory. Used to map object files \
- to the correct source files.")
- sys.exit(main(**{k: v
- for k, v in vars(parser.parse_args()).items()
- if v is not None}))
|