| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344 |
- #!/usr/bin/env python3
- #
- # Script to aggregate and report Linux perf results.
- #
- # Example:
- # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
- # ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
- #
- # Copyright (c) 2022, The littlefs authors.
- # SPDX-License-Identifier: BSD-3-Clause
- #
- import bisect
- import collections as co
- import csv
- import errno
- import fcntl
- import functools as ft
- import itertools as it
- import math as m
- import multiprocessing as mp
- import os
- import re
- import shlex
- import shutil
- import subprocess as sp
- import tempfile
- import zipfile
- # TODO support non-zip perf results?
- PERF_PATH = ['perf']
- PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
- PERF_FREQ = 100
- OBJDUMP_PATH = ['objdump']
- THRESHOLD = (0.5, 0.85)
- # integer fields
- class Int(co.namedtuple('Int', 'x')):
- __slots__ = ()
- def __new__(cls, x=0):
- if isinstance(x, Int):
- return x
- if isinstance(x, str):
- try:
- x = int(x, 0)
- except ValueError:
- # also accept +-∞ and +-inf
- if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
- x = m.inf
- elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
- x = -m.inf
- else:
- raise
- assert isinstance(x, int) or m.isinf(x), x
- return super().__new__(cls, x)
- def __str__(self):
- if self.x == m.inf:
- return '∞'
- elif self.x == -m.inf:
- return '-∞'
- else:
- return str(self.x)
- def __int__(self):
- assert not m.isinf(self.x)
- return self.x
- def __float__(self):
- return float(self.x)
- none = '%7s' % '-'
- def table(self):
- return '%7s' % (self,)
- diff_none = '%7s' % '-'
- diff_table = table
- def diff_diff(self, other):
- new = self.x if self else 0
- old = other.x if other else 0
- diff = new - old
- if diff == +m.inf:
- return '%7s' % '+∞'
- elif diff == -m.inf:
- return '%7s' % '-∞'
- else:
- return '%+7d' % diff
- def ratio(self, other):
- new = self.x if self else 0
- old = other.x if other else 0
- if m.isinf(new) and m.isinf(old):
- return 0.0
- elif m.isinf(new):
- return +m.inf
- elif m.isinf(old):
- return -m.inf
- elif not old and not new:
- return 0.0
- elif not old:
- return 1.0
- else:
- return (new-old) / old
- def __add__(self, other):
- return self.__class__(self.x + other.x)
- def __sub__(self, other):
- return self.__class__(self.x - other.x)
- def __mul__(self, other):
- return self.__class__(self.x * other.x)
- # perf results
- class PerfResult(co.namedtuple('PerfResult', [
- 'file', 'function', 'line',
- 'cycles', 'bmisses', 'branches', 'cmisses', 'caches',
- 'children'])):
- _by = ['file', 'function', 'line']
- _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches']
- _sort = ['cycles', 'bmisses', 'cmisses', 'branches', 'caches']
- _types = {
- 'cycles': Int,
- 'bmisses': Int, 'branches': Int,
- 'cmisses': Int, 'caches': Int}
- __slots__ = ()
- def __new__(cls, file='', function='', line=0,
- cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
- children=[]):
- return super().__new__(cls, file, function, int(Int(line)),
- Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches),
- children)
- def __add__(self, other):
- return PerfResult(self.file, self.function, self.line,
- self.cycles + other.cycles,
- self.bmisses + other.bmisses,
- self.branches + other.branches,
- self.cmisses + other.cmisses,
- self.caches + other.caches,
- self.children + other.children)
- def openio(path, mode='r', buffering=-1):
- # allow '-' for stdin/stdout
- if path == '-':
- if mode == 'r':
- return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
- else:
- return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
- else:
- return open(path, mode, buffering)
- # run perf as a subprocess, storing measurements into a zip file
- def record(command, *,
- output=None,
- perf_freq=PERF_FREQ,
- perf_period=None,
- perf_events=PERF_EVENTS,
- perf_path=PERF_PATH,
- **args):
- # create a temporary file for perf to write to, as far as I can tell
- # this is strictly needed because perf's pipe-mode only works with stdout
- with tempfile.NamedTemporaryFile('rb') as f:
- # figure out our perf invocation
- perf = perf_path + list(filter(None, [
- 'record',
- '-F%s' % perf_freq
- if perf_freq is not None
- and perf_period is None else None,
- '-c%s' % perf_period
- if perf_period is not None else None,
- '-B',
- '-g',
- '--all-user',
- '-e%s' % perf_events,
- '-o%s' % f.name]))
- # run our command
- try:
- if args.get('verbose'):
- print(' '.join(shlex.quote(c) for c in perf + command))
- err = sp.call(perf + command, close_fds=False)
- except KeyboardInterrupt:
- err = errno.EOWNERDEAD
- # synchronize access
- z = os.open(output, os.O_RDWR | os.O_CREAT)
- fcntl.flock(z, fcntl.LOCK_EX)
- # copy measurements into our zip file
- with os.fdopen(z, 'r+b') as z:
- with zipfile.ZipFile(z, 'a',
- compression=zipfile.ZIP_DEFLATED,
- compresslevel=1) as z:
- with z.open('perf.%d' % os.getpid(), 'w') as g:
- shutil.copyfileobj(f, g)
- # forward the return code
- return err
- # try to only process each dso onceS
- #
- # note this only caches with the non-keyword arguments
- def multiprocessing_cache(f):
- local_cache = {}
- manager = mp.Manager()
- global_cache = manager.dict()
- lock = mp.Lock()
- def multiprocessing_cache(*args, **kwargs):
- # check local cache?
- if args in local_cache:
- return local_cache[args]
- # check global cache?
- with lock:
- if args in global_cache:
- v = global_cache[args]
- local_cache[args] = v
- return v
- # fall back to calling the function
- v = f(*args, **kwargs)
- global_cache[args] = v
- local_cache[args] = v
- return v
- return multiprocessing_cache
- @multiprocessing_cache
- def collect_syms_and_lines(obj_path, *,
- objdump_path=None,
- **args):
- symbol_pattern = re.compile(
- '^(?P<addr>[0-9a-fA-F]+)'
- '\s+.*'
- '\s+(?P<size>[0-9a-fA-F]+)'
- '\s+(?P<name>[^\s]+)\s*$')
- line_pattern = re.compile(
- '^\s+(?:'
- # matches dir/file table
- '(?P<no>[0-9]+)'
- '(?:\s+(?P<dir>[0-9]+))?'
- '\s+.*'
- '\s+(?P<path>[^\s]+)'
- # matches line opcodes
- '|' '\[[^\]]*\]\s+'
- '(?:'
- '(?P<op_special>Special)'
- '|' '(?P<op_copy>Copy)'
- '|' '(?P<op_end>End of Sequence)'
- '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
- '|' 'Line .*?to (?P<op_line>[0-9]+)'
- '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
- '|' '.' ')*'
- ')$', re.IGNORECASE)
- # figure out symbol addresses and file+line ranges
- syms = {}
- sym_at = []
- cmd = objdump_path + ['-t', obj_path]
- if args.get('verbose'):
- print(' '.join(shlex.quote(c) for c in cmd))
- proc = sp.Popen(cmd,
- stdout=sp.PIPE,
- stderr=sp.PIPE if not args.get('verbose') else None,
- universal_newlines=True,
- errors='replace',
- close_fds=False)
- for line in proc.stdout:
- m = symbol_pattern.match(line)
- if m:
- name = m.group('name')
- addr = int(m.group('addr'), 16)
- size = int(m.group('size'), 16)
- # ignore zero-sized symbols
- if not size:
- continue
- # note multiple symbols can share a name
- if name not in syms:
- syms[name] = set()
- syms[name].add((addr, size))
- sym_at.append((addr, name, size))
- proc.wait()
- if proc.returncode != 0:
- if not args.get('verbose'):
- for line in proc.stderr:
- sys.stdout.write(line)
- # assume no debug-info on failure
- pass
- # sort and keep largest/first when duplicates
- sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
- sym_at_ = []
- for addr, name, size in sym_at:
- if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
- sym_at_.append((addr, name, size))
- sym_at = sym_at_
- # state machine for dwarf line numbers, note that objdump's
- # decodedline seems to have issues with multiple dir/file
- # tables, which is why we need this
- lines = []
- line_at = []
- dirs = {}
- files = {}
- op_file = 1
- op_line = 1
- op_addr = 0
- cmd = objdump_path + ['--dwarf=rawline', obj_path]
- if args.get('verbose'):
- print(' '.join(shlex.quote(c) for c in cmd))
- proc = sp.Popen(cmd,
- stdout=sp.PIPE,
- stderr=sp.PIPE if not args.get('verbose') else None,
- universal_newlines=True,
- errors='replace',
- close_fds=False)
- for line in proc.stdout:
- m = line_pattern.match(line)
- if m:
- if m.group('no') and not m.group('dir'):
- # found a directory entry
- dirs[int(m.group('no'))] = m.group('path')
- elif m.group('no'):
- # found a file entry
- dir = int(m.group('dir'))
- if dir in dirs:
- files[int(m.group('no'))] = os.path.join(
- dirs[dir],
- m.group('path'))
- else:
- files[int(m.group('no'))] = m.group('path')
- else:
- # found a state machine update
- if m.group('op_file'):
- op_file = int(m.group('op_file'), 0)
- if m.group('op_line'):
- op_line = int(m.group('op_line'), 0)
- if m.group('op_addr'):
- op_addr = int(m.group('op_addr'), 0)
- if (m.group('op_special')
- or m.group('op_copy')
- or m.group('op_end')):
- file = os.path.abspath(files.get(op_file, '?'))
- lines.append((file, op_line, op_addr))
- line_at.append((op_addr, file, op_line))
- if m.group('op_end'):
- op_file = 1
- op_line = 1
- op_addr = 0
- proc.wait()
- if proc.returncode != 0:
- if not args.get('verbose'):
- for line in proc.stderr:
- sys.stdout.write(line)
- # assume no debug-info on failure
- pass
- # sort and keep first when duplicates
- lines.sort()
- lines_ = []
- for file, line, addr in lines:
- if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
- lines_.append((file, line, addr))
- lines = lines_
- # sort and keep first when duplicates
- line_at.sort()
- line_at_ = []
- for addr, file, line in line_at:
- if len(line_at_) == 0 or line_at_[-1][0] != addr:
- line_at_.append((addr, file, line))
- line_at = line_at_
- return syms, sym_at, lines, line_at
- def collect_decompressed(path, *,
- perf_path=PERF_PATH,
- sources=None,
- everything=False,
- propagate=0,
- depth=1,
- **args):
- sample_pattern = re.compile(
- '(?P<comm>\w+)'
- '\s+(?P<pid>\w+)'
- '\s+(?P<time>[\w.]+):'
- '\s*(?P<period>\w+)'
- '\s+(?P<event>[^:]+):')
- frame_pattern = re.compile(
- '\s+(?P<addr>\w+)'
- '\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
- '\s+\((?P<dso>[^\)]+)\)')
- events = {
- 'cycles': 'cycles',
- 'branch-misses': 'bmisses',
- 'branches': 'branches',
- 'cache-misses': 'cmisses',
- 'cache-references': 'caches'}
- # note perf_path may contain extra args
- cmd = perf_path + [
- 'script',
- '-i%s' % path]
- if args.get('verbose'):
- print(' '.join(shlex.quote(c) for c in cmd))
- proc = sp.Popen(cmd,
- stdout=sp.PIPE,
- stderr=sp.PIPE if not args.get('verbose') else None,
- universal_newlines=True,
- errors='replace',
- close_fds=False)
- last_filtered = False
- last_event = ''
- last_period = 0
- last_stack = []
- deltas = co.defaultdict(lambda: {})
- syms_ = co.defaultdict(lambda: {})
- at_cache = {}
- results = {}
- def commit():
- # tail-recursively propagate measurements
- for i in range(len(last_stack)):
- results_ = results
- for j in reversed(range(i+1)):
- if i+1-j > depth:
- break
- # propagate
- name = last_stack[j]
- if name not in results_:
- results_[name] = (co.defaultdict(lambda: 0), {})
- results_[name][0][last_event] += last_period
- # recurse
- results_ = results_[name][1]
- for line in proc.stdout:
- # we need to process a lot of data, so wait to use regex as late
- # as possible
- if not line.startswith('\t'):
- if last_filtered:
- commit()
- last_filtered = False
- if line:
- m = sample_pattern.match(line)
- if m and m.group('event') in events:
- last_filtered = True
- last_event = m.group('event')
- last_period = int(m.group('period'), 0)
- last_stack = []
- elif last_filtered:
- m = frame_pattern.match(line)
- if m:
- # filter out internal/kernel functions
- if not everything and (
- m.group('sym').startswith('__')
- or m.group('sym').startswith('0')
- or m.group('sym').startswith('-')
- or m.group('sym').startswith('[')
- or m.group('dso').startswith('/usr/lib')):
- continue
- dso = m.group('dso')
- sym = m.group('sym')
- off = int(m.group('off'), 0) if m.group('off') else 0
- addr_ = int(m.group('addr'), 16)
- # get the syms/lines for the dso, this is cached
- syms, sym_at, lines, line_at = collect_syms_and_lines(
- dso,
- **args)
- # ASLR is tricky, we have symbols+offsets, but static symbols
- # means we may have multiple options for each symbol.
- #
- # To try to solve this, we use previous seen symbols to build
- # confidence for the correct ASLR delta. This means we may
- # guess incorrectly for early symbols, but this will only affect
- # a few samples.
- if sym in syms:
- sym_addr_ = addr_ - off
- # track possible deltas?
- for sym_addr, size in syms[sym]:
- delta = sym_addr - sym_addr_
- if delta not in deltas[dso]:
- deltas[dso][delta] = sum(
- abs(a_+delta - a)
- for s, (a_, _) in syms_[dso].items()
- for a, _ in syms[s])
- for delta in deltas[dso].keys():
- deltas[dso][delta] += abs(sym_addr_+delta - sym_addr)
- syms_[dso][sym] = sym_addr_, size
- # guess the best delta
- delta, _ = min(deltas[dso].items(),
- key=lambda x: (x[1], x[0]))
- addr = addr_ + delta
- # cached?
- if (dso,addr) in at_cache:
- cached = at_cache[(dso,addr)]
- if cached is None:
- # cache says to skip
- continue
- file, line = cached
- else:
- # find file+line
- i = bisect.bisect(line_at, addr, key=lambda x: x[0])
- if i > 0:
- _, file, line = line_at[i-1]
- else:
- file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
- # ignore filtered sources
- if sources is not None:
- if not any(
- os.path.abspath(file) == os.path.abspath(s)
- for s in sources):
- at_cache[(dso,addr)] = None
- continue
- else:
- # default to only cwd
- if not everything and not os.path.commonpath([
- os.getcwd(),
- os.path.abspath(file)]) == os.getcwd():
- at_cache[(dso,addr)] = None
- continue
- # simplify path
- if os.path.commonpath([
- os.getcwd(),
- os.path.abspath(file)]) == os.getcwd():
- file = os.path.relpath(file)
- else:
- file = os.path.abspath(file)
- at_cache[(dso,addr)] = file, line
- else:
- file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
- last_stack.append((file, sym, line))
- # stop propogating?
- if propagate and len(last_stack) >= propagate:
- commit()
- last_filtered = False
- if last_filtered:
- commit()
- proc.wait()
- if proc.returncode != 0:
- if not args.get('verbose'):
- for line in proc.stderr:
- sys.stdout.write(line)
- sys.exit(-1)
- # rearrange results into result type
- def to_results(results):
- results_ = []
- for name, (r, children) in results.items():
- results_.append(PerfResult(*name,
- **{events[k]: v for k, v in r.items()},
- children=to_results(children)))
- return results_
- return to_results(results)
- def collect_job(path, i, **args):
- # decompress into a temporary file, this is to work around
- # some limitations of perf
- with zipfile.ZipFile(path) as z:
- with z.open(i) as f:
- with tempfile.NamedTemporaryFile('wb') as g:
- shutil.copyfileobj(f, g)
- g.flush()
- return collect_decompressed(g.name, **args)
- def starapply(args):
- f, args, kwargs = args
- return f(*args, **kwargs)
- def collect(perf_paths, *,
- jobs=None,
- **args):
- # automatic job detection?
- if jobs == 0:
- jobs = len(os.sched_getaffinity(0))
- records = []
- for path in perf_paths:
- # each .perf file is actually a zip file containing perf files from
- # multiple runs
- with zipfile.ZipFile(path) as z:
- records.extend((path, i) for i in z.infolist())
- # we're dealing with a lot of data but also surprisingly
- # parallelizable
- if jobs is not None:
- results = []
- with mp.Pool(jobs) as p:
- for results_ in p.imap_unordered(
- starapply,
- ((collect_job, (path, i), args) for path, i in records)):
- results.extend(results_)
- else:
- results = []
- for path, i in records:
- results.extend(collect_job(path, i, **args))
- return results
- def fold(Result, results, *,
- by=None,
- defines=None,
- **_):
- if by is None:
- by = Result._by
- for k in it.chain(by or [], (k for k, _ in defines or [])):
- if k not in Result._by and k not in Result._fields:
- print("error: could not find field %r?" % k)
- sys.exit(-1)
- # filter by matching defines
- if defines is not None:
- results_ = []
- for r in results:
- if all(getattr(r, k) in vs for k, vs in defines):
- results_.append(r)
- results = results_
- # organize results into conflicts
- folding = co.OrderedDict()
- for r in results:
- name = tuple(getattr(r, k) for k in by)
- if name not in folding:
- folding[name] = []
- folding[name].append(r)
- # merge conflicts
- folded = []
- for name, rs in folding.items():
- folded.append(sum(rs[1:], start=rs[0]))
- # fold recursively
- folded_ = []
- for r in folded:
- folded_.append(r._replace(children=fold(
- Result, r.children,
- by=by,
- defines=defines)))
- folded = folded_
- return folded
- def table(Result, results, diff_results=None, *,
- by=None,
- fields=None,
- sort=None,
- summary=False,
- all=False,
- percent=False,
- depth=1,
- **_):
- all_, all = all, __builtins__.all
- if by is None:
- by = Result._by
- if fields is None:
- fields = Result._fields
- types = Result._types
- # fold again
- results = fold(Result, results, by=by)
- if diff_results is not None:
- diff_results = fold(Result, diff_results, by=by)
- # organize by name
- table = {
- ','.join(str(getattr(r, k) or '') for k in by): r
- for r in results}
- diff_table = {
- ','.join(str(getattr(r, k) or '') for k in by): r
- for r in diff_results or []}
- names = list(table.keys() | diff_table.keys())
- # sort again, now with diff info, note that python's sort is stable
- names.sort()
- if diff_results is not None:
- names.sort(key=lambda n: tuple(
- types[k].ratio(
- getattr(table.get(n), k, None),
- getattr(diff_table.get(n), k, None))
- for k in fields),
- reverse=True)
- if sort:
- for k, reverse in reversed(sort):
- names.sort(
- key=lambda n: tuple(
- (getattr(table[n], k),)
- if getattr(table.get(n), k, None) is not None else ()
- for k in ([k] if k else [
- k for k in Result._sort if k in fields])),
- reverse=reverse ^ (not k or k in Result._fields))
- # build up our lines
- lines = []
- # header
- header = []
- header.append('%s%s' % (
- ','.join(by),
- ' (%d added, %d removed)' % (
- sum(1 for n in table if n not in diff_table),
- sum(1 for n in diff_table if n not in table))
- if diff_results is not None and not percent else '')
- if not summary else '')
- if diff_results is None:
- for k in fields:
- header.append(k)
- elif percent:
- for k in fields:
- header.append(k)
- else:
- for k in fields:
- header.append('o'+k)
- for k in fields:
- header.append('n'+k)
- for k in fields:
- header.append('d'+k)
- header.append('')
- lines.append(header)
- def table_entry(name, r, diff_r=None, ratios=[]):
- entry = []
- entry.append(name)
- if diff_results is None:
- for k in fields:
- entry.append(getattr(r, k).table()
- if getattr(r, k, None) is not None
- else types[k].none)
- elif percent:
- for k in fields:
- entry.append(getattr(r, k).diff_table()
- if getattr(r, k, None) is not None
- else types[k].diff_none)
- else:
- for k in fields:
- entry.append(getattr(diff_r, k).diff_table()
- if getattr(diff_r, k, None) is not None
- else types[k].diff_none)
- for k in fields:
- entry.append(getattr(r, k).diff_table()
- if getattr(r, k, None) is not None
- else types[k].diff_none)
- for k in fields:
- entry.append(types[k].diff_diff(
- getattr(r, k, None),
- getattr(diff_r, k, None)))
- if diff_results is None:
- entry.append('')
- elif percent:
- entry.append(' (%s)' % ', '.join(
- '+∞%' if t == +m.inf
- else '-∞%' if t == -m.inf
- else '%+.1f%%' % (100*t)
- for t in ratios))
- else:
- entry.append(' (%s)' % ', '.join(
- '+∞%' if t == +m.inf
- else '-∞%' if t == -m.inf
- else '%+.1f%%' % (100*t)
- for t in ratios
- if t)
- if any(ratios) else '')
- return entry
- # entries
- if not summary:
- for name in names:
- r = table.get(name)
- if diff_results is None:
- diff_r = None
- ratios = None
- else:
- diff_r = diff_table.get(name)
- ratios = [
- types[k].ratio(
- getattr(r, k, None),
- getattr(diff_r, k, None))
- for k in fields]
- if not all_ and not any(ratios):
- continue
- lines.append(table_entry(name, r, diff_r, ratios))
- # total
- r = next(iter(fold(Result, results, by=[])), None)
- if diff_results is None:
- diff_r = None
- ratios = None
- else:
- diff_r = next(iter(fold(Result, diff_results, by=[])), None)
- ratios = [
- types[k].ratio(
- getattr(r, k, None),
- getattr(diff_r, k, None))
- for k in fields]
- lines.append(table_entry('TOTAL', r, diff_r, ratios))
- # find the best widths, note that column 0 contains the names and column -1
- # the ratios, so those are handled a bit differently
- widths = [
- ((max(it.chain([w], (len(l[i]) for l in lines)))+1+4-1)//4)*4-1
- for w, i in zip(
- it.chain([23], it.repeat(7)),
- range(len(lines[0])-1))]
- # adjust the name width based on the expected call depth, though
- # note this doesn't really work with unbounded recursion
- if not summary and not m.isinf(depth):
- widths[0] += 4*(depth-1)
- # print the tree recursively
- print('%-*s %s%s' % (
- widths[0], lines[0][0],
- ' '.join('%*s' % (w, x)
- for w, x in zip(widths[1:], lines[0][1:-1])),
- lines[0][-1]))
- if not summary:
- def recurse(results_, depth_, prefixes=('', '', '', '')):
- # rebuild our tables at each layer
- table_ = {
- ','.join(str(getattr(r, k) or '') for k in by): r
- for r in results_}
- names_ = list(table_.keys())
- # sort again at each layer, keep in mind the numbers are
- # changing as we descend
- names_.sort()
- if sort:
- for k, reverse in reversed(sort):
- names_.sort(
- key=lambda n: tuple(
- (getattr(table_[n], k),)
- if getattr(table_.get(n), k, None) is not None
- else ()
- for k in ([k] if k else [
- k for k in Result._sort if k in fields])),
- reverse=reverse ^ (not k or k in Result._fields))
- for i, name in enumerate(names_):
- r = table_[name]
- is_last = (i == len(names_)-1)
- print('%s%-*s %s' % (
- prefixes[0+is_last],
- widths[0] - (
- len(prefixes[0+is_last])
- if not m.isinf(depth) else 0),
- name,
- ' '.join('%*s' % (w, x)
- for w, x in zip(
- widths[1:],
- table_entry(name, r)[1:]))))
- # recurse?
- if depth_ > 1:
- recurse(
- r.children,
- depth_-1,
- (prefixes[2+is_last] + "|-> ",
- prefixes[2+is_last] + "'-> ",
- prefixes[2+is_last] + "| ",
- prefixes[2+is_last] + " "))
- # we have enough going on with diffing to make the top layer
- # a special case
- for name, line in zip(names, lines[1:-1]):
- print('%-*s %s%s' % (
- widths[0], line[0],
- ' '.join('%*s' % (w, x)
- for w, x in zip(widths[1:], line[1:-1])),
- line[-1]))
- if name in table and depth > 1:
- recurse(
- table[name].children,
- depth-1,
- ("|-> ",
- "'-> ",
- "| ",
- " "))
- print('%-*s %s%s' % (
- widths[0], lines[-1][0],
- ' '.join('%*s' % (w, x)
- for w, x in zip(widths[1:], lines[-1][1:-1])),
- lines[-1][-1]))
- def annotate(Result, results, *,
- annotate=None,
- threshold=None,
- branches=False,
- caches=False,
- **args):
- # figure out the threshold
- if threshold is None:
- t0, t1 = THRESHOLD
- elif len(threshold) == 1:
- t0, t1 = threshold[0], threshold[0]
- else:
- t0, t1 = threshold
- t0, t1 = min(t0, t1), max(t0, t1)
- if not branches and not caches:
- tk = 'cycles'
- elif branches:
- tk = 'bmisses'
- else:
- tk = 'cmisses'
- # find max cycles
- max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1]))
- for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
- # flatten to line info
- results = fold(Result, results, by=['file', 'line'])
- table = {r.line: r for r in results if r.file == path}
- # calculate spans to show
- if not annotate:
- spans = []
- last = None
- func = None
- for line, r in sorted(table.items()):
- if float(getattr(r, tk)) / max_ >= t0:
- if last is not None and line - last.stop <= args['context']:
- last = range(
- last.start,
- line+1+args['context'])
- else:
- if last is not None:
- spans.append((last, func))
- last = range(
- line-args['context'],
- line+1+args['context'])
- func = r.function
- if last is not None:
- spans.append((last, func))
- with open(path) as f:
- skipped = False
- for i, line in enumerate(f):
- # skip lines not in spans?
- if not annotate and not any(i+1 in s for s, _ in spans):
- skipped = True
- continue
- if skipped:
- skipped = False
- print('%s@@ %s:%d: %s @@%s' % (
- '\x1b[36m' if args['color'] else '',
- path,
- i+1,
- next(iter(f for _, f in spans)),
- '\x1b[m' if args['color'] else ''))
- # build line
- if line.endswith('\n'):
- line = line[:-1]
- r = table.get(i+1)
- if r is not None and (
- float(r.cycles) > 0
- if not branches and not caches
- else float(r.bmisses) > 0 or float(r.branches) > 0
- if branches
- else float(r.cmisses) > 0 or float(r.caches) > 0):
- line = '%-*s // %s' % (
- args['width'],
- line,
- '%s cycles' % r.cycles
- if not branches and not caches
- else '%s bmisses, %s branches' % (r.bmisses, r.branches)
- if branches
- else '%s cmisses, %s caches' % (r.cmisses, r.caches))
- if args['color']:
- if float(getattr(r, tk)) / max_ >= t1:
- line = '\x1b[1;31m%s\x1b[m' % line
- elif float(getattr(r, tk)) / max_ >= t0:
- line = '\x1b[35m%s\x1b[m' % line
- print(line)
- def report(perf_paths, *,
- by=None,
- fields=None,
- defines=None,
- sort=None,
- branches=False,
- caches=False,
- **args):
- # figure out what color should be
- if args.get('color') == 'auto':
- args['color'] = sys.stdout.isatty()
- elif args.get('color') == 'always':
- args['color'] = True
- else:
- args['color'] = False
- # depth of 0 == m.inf
- if args.get('depth') == 0:
- args['depth'] = m.inf
- # find sizes
- if not args.get('use', None):
- results = collect(perf_paths, **args)
- else:
- results = []
- with openio(args['use']) as f:
- reader = csv.DictReader(f, restval='')
- for r in reader:
- if not any('perf_'+k in r and r['perf_'+k].strip()
- for k in PerfResult._fields):
- continue
- try:
- results.append(PerfResult(
- **{k: r[k] for k in PerfResult._by
- if k in r and r[k].strip()},
- **{k: r['perf_'+k] for k in PerfResult._fields
- if 'perf_'+k in r and r['perf_'+k].strip()}))
- except TypeError:
- pass
- # fold
- results = fold(PerfResult, results, by=by, defines=defines)
- # sort, note that python's sort is stable
- results.sort()
- if sort:
- for k, reverse in reversed(sort):
- results.sort(
- key=lambda r: tuple(
- (getattr(r, k),) if getattr(r, k) is not None else ()
- for k in ([k] if k else PerfResult._sort)),
- reverse=reverse ^ (not k or k in PerfResult._fields))
- # write results to CSV
- if args.get('output'):
- with openio(args['output'], 'w') as f:
- writer = csv.DictWriter(f,
- (by if by is not None else PerfResult._by)
- + ['perf_'+k for k in (
- fields if fields is not None else PerfResult._fields)])
- writer.writeheader()
- for r in results:
- writer.writerow(
- {k: getattr(r, k) for k in (
- by if by is not None else PerfResult._by)}
- | {'perf_'+k: getattr(r, k) for k in (
- fields if fields is not None else PerfResult._fields)})
- # find previous results?
- if args.get('diff'):
- diff_results = []
- try:
- with openio(args['diff']) as f:
- reader = csv.DictReader(f, restval='')
- for r in reader:
- if not any('perf_'+k in r and r['perf_'+k].strip()
- for k in PerfResult._fields):
- continue
- try:
- diff_results.append(PerfResult(
- **{k: r[k] for k in PerfResult._by
- if k in r and r[k].strip()},
- **{k: r['perf_'+k] for k in PerfResult._fields
- if 'perf_'+k in r and r['perf_'+k].strip()}))
- except TypeError:
- pass
- except FileNotFoundError:
- pass
- # fold
- diff_results = fold(PerfResult, diff_results, by=by, defines=defines)
- # print table
- if not args.get('quiet'):
- if args.get('annotate') or args.get('threshold'):
- # annotate sources
- annotate(PerfResult, results,
- branches=branches,
- caches=caches,
- **args)
- else:
- # print table
- table(PerfResult, results,
- diff_results if args.get('diff') else None,
- by=by if by is not None else ['function'],
- fields=fields if fields is not None
- else ['cycles'] if not branches and not caches
- else ['bmisses', 'branches'] if branches
- else ['cmisses', 'caches'],
- sort=sort,
- **args)
- def main(**args):
- if args.get('record'):
- return record(**args)
- else:
- return report(**args)
- if __name__ == "__main__":
- import argparse
- import sys
- # bit of a hack, but parse_intermixed_args and REMAINDER are
- # incompatible, so we need to figure out what we want before running
- # argparse
- if '-R' in sys.argv or '--record' in sys.argv:
- nargs = argparse.REMAINDER
- else:
- nargs = '*'
- argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None
- argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
- parser = argparse.ArgumentParser(
- description="Aggregate and report Linux perf results.",
- allow_abbrev=False,
- conflict_handler='ignore')
- parser.add_argument(
- 'perf_paths',
- nargs=nargs,
- help="Input *.perf files.")
- parser.add_argument(
- '-v', '--verbose',
- action='store_true',
- help="Output commands that run behind the scenes.")
- parser.add_argument(
- '-q', '--quiet',
- action='store_true',
- help="Don't show anything, useful with -o.")
- parser.add_argument(
- '-o', '--output',
- help="Specify CSV file to store results.")
- parser.add_argument(
- '-u', '--use',
- help="Don't parse anything, use this CSV file.")
- parser.add_argument(
- '-d', '--diff',
- help="Specify CSV file to diff against.")
- parser.add_argument(
- '-a', '--all',
- action='store_true',
- help="Show all, not just the ones that changed.")
- parser.add_argument(
- '-p', '--percent',
- action='store_true',
- help="Only show percentage change, not a full diff.")
- parser.add_argument(
- '-b', '--by',
- action='append',
- choices=PerfResult._by,
- help="Group by this field.")
- parser.add_argument(
- '-f', '--field',
- dest='fields',
- action='append',
- choices=PerfResult._fields,
- help="Show this field.")
- parser.add_argument(
- '-D', '--define',
- dest='defines',
- action='append',
- type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
- help="Only include results where this field is this value.")
- class AppendSort(argparse.Action):
- def __call__(self, parser, namespace, value, option):
- if namespace.sort is None:
- namespace.sort = []
- namespace.sort.append((value, True if option == '-S' else False))
- parser.add_argument(
- '-s', '--sort',
- nargs='?',
- action=AppendSort,
- help="Sort by this field.")
- parser.add_argument(
- '-S', '--reverse-sort',
- nargs='?',
- action=AppendSort,
- help="Sort by this field, but backwards.")
- parser.add_argument(
- '-Y', '--summary',
- action='store_true',
- help="Only show the total.")
- parser.add_argument(
- '-F', '--source',
- dest='sources',
- action='append',
- help="Only consider definitions in this file. Defaults to anything "
- "in the current directory.")
- parser.add_argument(
- '--everything',
- action='store_true',
- help="Include builtin and libc specific symbols.")
- parser.add_argument(
- '--branches',
- action='store_true',
- help="Show branches and branch misses.")
- parser.add_argument(
- '--caches',
- action='store_true',
- help="Show cache accesses and cache misses.")
- parser.add_argument(
- '-P', '--propagate',
- type=lambda x: int(x, 0),
- help="Depth to propagate samples up the call-stack. 0 propagates up "
- "to the entry point, 1 does no propagation. Defaults to 0.")
- parser.add_argument(
- '-Z', '--depth',
- nargs='?',
- type=lambda x: int(x, 0),
- const=0,
- help="Depth of function calls to show. 0 shows all calls but may not "
- "terminate!")
- parser.add_argument(
- '-A', '--annotate',
- action='store_true',
- help="Show source files annotated with coverage info.")
- parser.add_argument(
- '-T', '--threshold',
- nargs='?',
- type=lambda x: tuple(float(x) for x in x.split(',')),
- const=THRESHOLD,
- help="Show lines with samples above this threshold as a percent of "
- "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
- parser.add_argument(
- '-c', '--context',
- type=lambda x: int(x, 0),
- default=3,
- help="Show n additional lines of context. Defaults to 3.")
- parser.add_argument(
- '-W', '--width',
- type=lambda x: int(x, 0),
- default=80,
- help="Assume source is styled with this many columns. Defaults to 80.")
- parser.add_argument(
- '--color',
- choices=['never', 'always', 'auto'],
- default='auto',
- help="When to use terminal colors. Defaults to 'auto'.")
- parser.add_argument(
- '-j', '--jobs',
- nargs='?',
- type=lambda x: int(x, 0),
- const=0,
- help="Number of processes to use. 0 spawns one process per core.")
- parser.add_argument(
- '--perf-path',
- type=lambda x: x.split(),
- help="Path to the perf executable, may include flags. "
- "Defaults to %r." % PERF_PATH)
- parser.add_argument(
- '--objdump-path',
- type=lambda x: x.split(),
- default=OBJDUMP_PATH,
- help="Path to the objdump executable, may include flags. "
- "Defaults to %r." % OBJDUMP_PATH)
- # record flags
- record_parser = parser.add_argument_group('record options')
- record_parser.add_argument(
- 'command',
- nargs=nargs,
- help="Command to run.")
- record_parser.add_argument(
- '-R', '--record',
- action='store_true',
- help="Run a command and aggregate perf measurements.")
- record_parser.add_argument(
- '-o', '--output',
- help="Output file. Uses flock to synchronize. This is stored as a "
- "zip-file of multiple perf results.")
- record_parser.add_argument(
- '--perf-freq',
- help="perf sampling frequency. This is passed directly to perf. "
- "Defaults to %r." % PERF_FREQ)
- record_parser.add_argument(
- '--perf-period',
- help="perf sampling period. This is passed directly to perf.")
- record_parser.add_argument(
- '--perf-events',
- help="perf events to record. This is passed directly to perf. "
- "Defaults to %r." % PERF_EVENTS)
- record_parser.add_argument(
- '--perf-path',
- type=lambda x: x.split(),
- help="Path to the perf executable, may include flags. "
- "Defaults to %r." % PERF_PATH)
- # avoid intermixed/REMAINDER conflict, see above
- if nargs == argparse.REMAINDER:
- args = parser.parse_args()
- else:
- args = parser.parse_intermixed_args()
- # perf_paths/command overlap, so need to do some munging here
- args.command = args.perf_paths
- if args.record:
- if not args.command:
- print('error: no command specified?')
- sys.exit(-1)
- if not args.output:
- print('error: no output file specified?')
- sys.exit(-1)
- sys.exit(main(**{k: v
- for k, v in vars(args).items()
- if v is not None}))
|