Source code for compyle.profile

""" Utils for profiling kernels
"""

from contextlib import contextmanager
from collections import defaultdict
import time
from .config import get_config


def _make_default():
    return dict(calls=0, time=0.0)


_current_level = 0
_profile_info = defaultdict(
    lambda: defaultdict(_make_default)
)


def _record_profile(name, time):
    global _profile_info, _current_level
    li = _profile_info[_current_level]
    li[name]['time'] += time
    li[name]['calls'] += 1


@contextmanager
def profile_ctx(name):
    """ Context manager for profiling

    For profiling a function f, it can be used as follows::

    with profile_ctx('f'):
        f()
    """
    global _current_level
    _current_level += 1
    start = time.time()
    try:
        yield start
        end = time.time()
    finally:
        _current_level -= 1
    _record_profile(name, end - start)


def profile(method=None, name=None):
    """Decorator for profiling a function. Can be used as follows::

    @profile
    def f():
        pass


    If explicitly passed a name, with @profile(name='some name'), it will use
    the given name. Otherwise, if the function is a class method, and the class
    has a `self.name` attribute, it will use that. Otherwise, it will use the
    method's qualified name to record the profile.

    """
    def make_wrapper(method):
        def wrapper(*args, **kwargs):
            self = args[0] if len(args) else None
            if name is None:
                if hasattr(self, method.__name__) and hasattr(self, 'name'):
                    p_name = self.name
                else:
                    p_name = getattr(method, '__qualname__', method.__name__)
            else:
                p_name = name
            with profile_ctx(p_name):
                return method(*args, **kwargs)
        wrapper.__doc__ = method.__doc__
        return wrapper
    if method is None:
        return make_wrapper
    else:
        return make_wrapper(method)


class ProfileContext:
    """Used for a low-level profiling context.

    This is typically useful in Cython code where decorators are not usable and
    using a context manager makes the code hard to read.

    Example
    -------

    p = ProfileContext('some_func')
    do_something()
    p.stop()

    """
    def __init__(self, name):
        self.name = name
        global _current_level
        _current_level += 1
        self.start = time.time()

    def stop(self):
        global _current_level
        _current_level -= 1
        _record_profile(self.name, time.time() - self.start)


def get_profile_info():
    global _profile_info
    return _profile_info


def print_profile():
    global _profile_info
    hr = '-'*70
    print(hr)
    if len(_profile_info) == 0:
        print("No profiling information available")
        print(hr)
        return
    print("Profiling info:")
    print(
        "{:<6} {:<40} {:<10} {:<10}".format(
            'Level', 'Function', 'N calls', 'Time')
    )
    tot_time = 0
    for level in range(0, min(len(_profile_info), 2)):
        profile_data = sorted(
            _profile_info[level].items(), key=lambda x: x[1]['time'],
            reverse=True
        )
        for kernel, data in profile_data:
            print("{:<6} {:<40} {:<10} {:<10.3g}".format(
                level, kernel, data['calls'], data['time'])
            )
            if level == 0:
                tot_time += data['time']
    print("Total profiled time: %g secs" % tot_time)
    print(hr)


def profile2csv(fname, info=None):
    '''Write profile info to a CSV file.

    If the optional info argument is passed, it is used as the profile info.
    The `info` argument is a list, potentially one for each rank (for a
    parallel simulation).
    '''
    if info is None:
        info = [get_profile_info()]
    with open(fname, 'w') as f:
        f.write("{0},{1},{2},{3},{4}\n".format(
            'rank', 'level', 'function', 'calls', 'time')
        )
        for rank in range(len(info)):
            pdata = info[rank]
            for level in sorted(pdata.keys()):
                profile_data = sorted(
                    pdata[level].items(), key=lambda x: x[1]['time'],
                    reverse=True
                )
                for name, data in profile_data:
                    f.write("{0},{1},{2},{3},{4}\n".format(
                        rank, level, name, data['calls'], data['time']
                    ))


def profile_kernel(kernel, name, backend=None):
    """For profiling raw PyCUDA/PyOpenCL kernels or cython functions
    """
    from compyle.array import get_backend
    backend = get_backend(backend)

    def _profile_knl(*args, **kwargs):
        if backend == 'opencl':
            start = time.time()
            event = kernel(*args, **kwargs)
            event.wait()
            end = time.time()
            _record_profile(name, end - start)
            return event
        elif backend == 'cuda':
            exec_time = kernel(*args, **kwargs, time_kernel=True)
            _record_profile(name, exec_time)
            return exec_time
        else:
            start = time.time()
            kernel(*args, **kwargs)
            end = time.time()
            _record_profile(name, end - start)

    if get_config().profile:
        wgi = getattr(kernel, 'get_work_group_info', None)
        if wgi is not None:
            _profile_knl.get_work_group_info = wgi
        return _profile_knl
    else:
        return kernel


def named_profile(name, backend=None):
    """Decorator for profiling raw PyOpenCL/PyCUDA kernels or cython functions.
    This can be used on a function that returns a raw PyCUDA/PyOpenCL kernel

    For example::

    @named_profile('prefix_sum')
    def _get_prefix_sum(ctx):
        return GenericScanKernel(ctx, np.int32,
                                 arguments="__global int *ary",
                                 input_expr="ary[i]",
                                 scan_expr="a+b", neutral="0",
                                 output_statement="ary[i] = prev_item")
    """
    from compyle.array import get_backend
    backend = get_backend(backend)

    def _decorator(f):
        if name is None:
            n = f.__name__
        else:
            n = name

        def _profiled_kernel_generator(*args, **kwargs):
            kernel = f(*args, **kwargs)
            return profile_kernel(kernel, n, backend=backend)

        return _profiled_kernel_generator

    return _decorator