#!/usr/bin/env python3
# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
# -*- coding: utf-8 -*-
#
# Copyright (C) 2017 Intel Corporation
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

# Author: Dale Schouten

from sys import stderr, version_info
assert [version_info.major, version_info.minor] >= [3,6], 'Script requires python 3.6 or higher'

import pandas as pd
assert [int(i) for i in pd.__version__.split('.')] >= [0,23,1], 'Script requires pandas 0.23.1 or higher'

import matplotlib as mpl
from matplotlib import pyplot as plt
import argparse
from pathlib import Path
from functools import wraps
from mimetypes import guess_type

common_defaults = {
   'data_files': None,          # List of input csv files
   'config': [],                # Config file
   'columns': [],               # list of 1 or 2 columns
   'nrows': None,               # Number of rows of input to read
   'skiprows': 0,               # Number of rows to skip at beginning of file
   'title': '',                 # Title to appear at top of graph
   'title_prefix': '',          # Prefix to prepend to title
   'title_suffix': '',          # Suffix to append to title
   'subtitle': None,            # Optional subtitle under title
   'confidential': True,        # Print 'Intel Confidential if True
   'display': False,            # Show plot interactively
   'save_image': True,          # Save the plot to an image file
   'image_file_prefix': None,   # Optional prefix for image file name
   'ymin': None,                # Y axis maximum
   'ymax': None,                # Y axis minimum
   'logy': True,                # Set logy for grahp_style 3
   'output_path': Path(),       # save files in current dir by default
   'units': 'ns',               # Input units, i.e. numbers are assumed to be ns
   'data_start': '<START_OF_MEASUREMENTS>', # Marker to indicate where data
                  # starts. This line should immediately precede header row
   'data_end': '<END_OF_MEASUREMENTS>', # Marker to indicate where data ends.
                  # This line should immediately follow the last line of data
   'error_check': True,         # Perform error checks on inputs and calculations
}

special_defaults = {
   'nbins': None,               # Number of  bins to use in histogram
   'save_deltas': None ,        # Filename to save calculated deltas
   'color': 'blue',             # Color of plot
   'hist_pos': 'left',          # For style 3, histogram on right or left
   'maxline': False,            # Add an annotated line at the max measurement
   'grid_style': 'white',       # Use gray horizontal grid-lines, No vertical
   'summary': False,            # Print summary stats to stdout
   'summary_only': False,       # Print summary stats to stdout
}

defaults = {**common_defaults, **special_defaults}

###
### Return subset of dict 'd', corresponding to the keys passed in.
### 'keys' can be any iterable of keys, including another dict.
### All the keys passed in need to be present in d.
###
def select(d, keys):
   return {k:d[k] for k in keys}

###
### Decorator to allow functions to be called with filenames or list of names
###
### If first parameter is a string or pathlib.Path, it will be
### encased in a list otherwise it will be passed on untouched.
###
def expecting_list(fcn):
   """Decorator to allow functions to be called with filenames or list of names
   If first parameter is a string or pathlib.Path, it will be
   encased in a list otherwise it will be passed on untouched.
   """

   @wraps(fcn)
   def wrapper(flist, *args, **kwargs):
      if isinstance(flist, (str, Path)):
            return fcn([flist], *args, **kwargs)
      else:
            return fcn(flist, *args, **kwargs)
   return wrapper

#
# YesNo is a custom argparse Action class that provides boolean arguments
# to set boolean option True or False, i.e.
# '--foo' will set option 'foo' to True
# '--nofoo' will set it to False
# Default can be specified by the 'add_argument' caller.
#
# This is an alternative to the 'store_true' and 'store_false' options, which
# necessarily imply a default.
#
# Caveat - since this automatically creates an option that starts with
# '--no', and relies on that spelling, when the option is used, you
# can't have a YesNo argument that starts with '--no'.
# (That would be a --nono)
# Options that are not 'action=YesNo' are not affected by this.
#
# Code inspired by the answer to this stackoverflow question:
# https://stackoverflow.com/questions/9234258/in-python-argparse-is-it-possible-to-have-paired-no-something-something-arg
#
class YesNo(argparse.Action):
   def __init__(self, option_strings, dest, **kwargs):
      assert 0 < len(option_strings) <= 2
      s = [t for t in option_strings if t.startswith('--')][0]
      # s is the long-form option string
      assert s[:2] == '--'
      assert s[2:4] != 'no'
      assert dest == s[2:]

      accepts = option_strings + ['--no'+s[2:]]

      kwargs['nargs'] = 0
      kwargs['type'] = bool

      super(YesNo, self).__init__(accepts, dest, **kwargs)

   def __call__(self, parser, namespace, values, option_string=None):
      if option_string.startswith('--no'):
            setattr(namespace, self.dest, False)
      else:
            setattr(namespace, self.dest, True)


###
### Returns a read_only namespace of all of it's arguments
###
### For example:
###
### ns = Nspace(x=1,y=2,z=3)
### ns.x, ns.y, ns.z
### (1, 2, 3)
###
### To create an Nspace from a dict:
### d = {'a': 1, 'b': 2, 'c': 3}
### ns = Nspace(**d)
### ns.a, ns.b, ns.c
### (1, 2, 3)
###
### but if you want a namespace with certann set fields, you
### can specify them in the constructor call:
###
class Nspace():
   def __init__(self, **kwargs):
      self.__dict__.update(kwargs)

   def __setattr__(self,k,v):
      pass


###
### get_config - get json config file as dict
###
### Robust enough to try adding .json extension if it doesn't work and
### doesn't have it already.  This makes life a little more convenient
### for interactive users.
###
def get_config(fn):
   """Reads JSON config file and checks for errors, returning dict

   Returns Empty dict if any errors are detected
   If file doesn't end in .json, and loading it doesn't work, then
   it will try tacking '.json' on it and trying again.
   """
   import json

   # Consider variations, if no '.json' extension
   fnames = [fn]
   if not fn.lower().endswith('.json'):
      fnames += [fn+t for t in ('.json','.JSON')]
   for fname in fnames:
      if fname != fn:
            # Warn user if filename doesn't exactly match
            print(f"Trying '{fname}' instead", file=stderr)
      try:
            with open(fname) as f:
               cfg = json.load(f)
               assert isinstance(cfg, dict), "JSON file needs to be a dict"
               return cfg   # File seems to be OK
      except (json.JSONDecodeError, AssertionError, FileNotFoundError) as err:
               print(f"Can't parse json file '{fn}': {err}", file=stderr)
   return {}


###
### Knobs class used for global opt settings
###
###
class Knobs():
   def __init__(self, defaults={}):
      self.options = defaults
      self.override = {}
      self.p = self.np = None

   def update(self, **kwargs):
      self.options.update(kwargs)

   # Custom help formatter - takes terminal width into
   # account, obeys newlines and uses textwrap to determine
   # line wrapping.
   class MyHelpFormatter(argparse.HelpFormatter):
      def __init__(self, *args, **kwargs):
            try:
               from shutil import get_terminal_size
               width, _ = get_terminal_size()
               kwargs['width'] = width
               kwargs['max_help_position'] = min(29, max(10,width-60))
            except:
               kwargs['max_help_position'] = 20
               pass
            super().__init__( *args, **kwargs)
      def _split_lines(self, text, width):
            from textwrap import wrap

            lines = text.splitlines()
            lines = [wrap(t,
                        width-1,
                        break_long_words=False,
                        break_on_hyphens=False) for t in lines]
            lines = sum(lines, [])  # flatten list
            lines = lines + ['']
            return lines

   # read config files and update options accordingly
   def add_cfgs(self, config_filenames):
      for fn in config_filenames:
            self.update(**get_config(fn))

   def set_parser(self, *args, **kwargs):
      if not self.p:
            self.reset_parser(*args, **kwargs)

   def reset_parser(self, *args, **kwargs):
      assert 'argument_default' not in kwargs, "Don't pass argument_default to Knobs"
      self.p = argparse.ArgumentParser(
            *args,
            formatter_class=self.MyHelpFormatter,
            **kwargs
      )
      # This second parser is to detect what was actually on the command line
      # ignoring any default settings.
      self.np = argparse.ArgumentParser(
            *args,
            argument_default=argparse.SUPPRESS,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
            **kwargs,
      )

   def parse_args(self, argv):
      # Use the 'np' parser to filter out any settings that weren't
      # actually on the command line
      if self.p:
            cli_args = vars(self.p.parse_args(argv))
            only_args = vars(self.np.parse_args(argv))
            self.override = select(cli_args, only_args)

   def add_argument(self, *args, **kwargs):
      assert 'default' not in kwargs, "Don't set argument_default in Knobs.add_argument()"
      self.set_parser()
      # If I use SUPRESS, I can't set defaults and defaults don't show
      # up in the help messages.  If I don't, then the defaults will
      # overwrite the config file settings.  With both, I use the SUPPRESS
      # version (np) to detect what's actually on the command line and the
      # other (p) for the values.  This has the added benefit that
      # I don't have to figure out what the destination is, as the first
      # call returns that information.
      t = self.np.add_argument(*args, default=argparse.SUPPRESS, **kwargs)
      default = self.options[t.dest]
      self.p.add_argument(*args, default=default, **kwargs)

   # return Nspace of option settings, with cli opts overriding
   # defaults and cfg settings
   def get_opts(self, **kwargs):
      return Nspace(**{**self.options, **self.override, **kwargs})


# knobs contains all the settings from defaults, config files and
# commandline, while opts is a read-only snapshot of knobs
knobs = Knobs(defaults)
opts = knobs.get_opts()


###
### default_opts is a decorator that allows flexibility
### in specifying parameters with default values.  It avoids
### the problem of having redundant boiler-plate code while
### making it easy to use an options namespace.
###
### When a fuction is decorated with default_opts
### then it will always have an opts object available
### even if it hasn't been explicitly passed in.
### keyword arguments that correspond to names in the opts
### object.  This is especially helpful when using this module
### interactively (e.g. in jupyter notebook) so you don't have
### to create an opts object just to call module functions and
### you can set options with keyword args. I.e. instead of
###
### knobs = Knobs(dict(nrows=1000, columns=['start', 'end']))
### get_times('foo.csv', knobs.get_opts()))
###
### You can more intuitively do:
###
### get_times('foo.csv', nrows=1000, columns=['start', 'end'])
###
def default_opts(def_opts, name='opts'):
   from inspect import signature

   opts_class = type(def_opts)
   def to_map(o):
      try:
            return vars(o)
      except TypeError:
            return o.copy()

   def decorator(f):
      @wraps(f)
      def wrapper(*args, **kwargs):

            # From the formal parameter list, extract any
            # that have been passed as keyword args
            formal = list(signature(f).parameters.keys())
            actual = [p for p in formal if not p in kwargs]
            assert len(args) <= len(actual)

            # If the the opts variable has been passed
            # as a positional argument, remove it
            # it will be replaced as a keyword arg.
            f_args = list(args)
            if name in actual[:len(f_args)]:
               passed = to_map(f_args.pop(actual.index(name)))
            elif name in kwargs:
               passed = to_map(kwargs.pop(name))
            else:
               passed = {}
            new_opts = {**to_map(def_opts), **passed}

            # actual contains the formal params that were passed
            # as positional, while f_args contains the positional
            # arguments.
            pos_args = {k: v for k,v in zip(actual, f_args)}
            new_opts = {**new_opts, **kwargs, **pos_args}

            # ensure that any kwargs passed in are legal and put 'opts' back in
            f_kwargs = {k:v for k,v in kwargs.items() if k in formal}
            f_kwargs[name] = opts_class(**new_opts)

            # f_kwargs contains only options that are formal parameters
            # f_args contains options that were passed as positional
            # except for opts that was changed to a keyword arg.
            return f(*f_args, **f_kwargs)
      return wrapper
   return decorator

###
### Type used to pass multiple values as a comma separated
### list on the command line, e.g.
### --foo this,that,the_other_thing
###
def comma_separated_list(s):
   return s.split(',')


###
### add common args to the arg parser for knobs, used for args that
### are common to other modules like outliers and combined_hist
###
def common_args(knobs):
      knobs.add_argument('data_files',
            nargs='*',
            help='CSV file(s) with start and stop data',
            metavar='path/filename{,another_file, ...}'
      )
      knobs.add_argument('-c', '--config',
            help="Config files to add",
            type=comma_separated_list,
            metavar='path/cfg.json{,...}'
      )
      knobs.add_argument('--columns',
            help="Start and End columns (comma separated), or just Time column",
            type=comma_separated_list,
            metavar= '"start_column,end_column" or "time_column"',
      )
      knobs.add_argument('-n', '--nrows',
            help='Number of rows to read',
            type=int,
            metavar="<int>",
      )
      knobs.add_argument('--skiprows',
            help='Number of rows to skip at beginning of file',
            type=int,
            metavar="<int>",
      )
      knobs.add_argument('-t', '--title',
            help='Title on graph',
            metavar='"Graph Title"',
      )
      knobs.add_argument('--title_prefix',
            help='Prefix prepended to title',
            metavar='"Beginning of title"',
      )
      knobs.add_argument('--title_suffix',
            help='Suffix appended to title',
            metavar='"End of title"',
      )
      knobs.add_argument('-s', '--subtitle',
            help='Subtitle on graph',
            metavar='"Graph Subtitle"',
      )
      knobs.add_argument('--confidential',
            help="Annotate with 'Intel Confidential'\nDefault: %(default)s",
            action=YesNo,
            dest='confidential',
      )
      knobs.add_argument('-d', '--display',
            help='Display graph\nDefault: %(default)s',
            action=YesNo,
      )
      knobs.add_argument('--save_image',
            help="Save images to files\nDefault: %(default)s",
            action=YesNo,
            dest='save_image',
      )
      knobs.add_argument('-p', '--image_file_prefix',
            help='Add prefix to image filenames',
            metavar='"prefix-for-filename-"',
      )
      knobs.add_argument('--output_path',
            help='Save files to specific directory',
            type=Path,
            metavar='"Path/to/output/"'
      )
      knobs.add_argument('--ymin',
            help='Lower limit on y-axis (time) for sequence graphs',
            type=float,
            metavar="<float>",
      )
      knobs.add_argument('--ymax',
            help='Upper limit on y-axis (time) for sequence graphs',
            type=float,
            metavar="<float>",
      )
      knobs.add_argument('--logy',
            help='\n'.join(["If set, use log scale for y axis, else use linear",
                           "(may not apply to some styles)",
                           "Default: %(default)s",
            ]),
            action=YesNo,
      )
      knobs.add_argument('--data_start',
            help='If set, any text before this line is ignored',
      )
      knobs.add_argument('--error_check',
            help='Perform error checking on input and calculated times',
            action=YesNo,
      )

if __name__ == '__main__':
   from sys import argv

   knobs.set_parser(
      description='Create graphs from time data',
      epilog='Example:\n    python3 '+argv[0]+
               ' ./seq_hist.py test/samples/2k.csv --nrows 1000 -d',
   )

   common_args(knobs)

   knobs.add_argument('--nbins',
      help='Number of bins for histogram',
      type=int,
      metavar="<int>",
   )
   knobs.add_argument('--save_deltas',
      help="Filename to save calculated deltas as csv",
      metavar="filename-for-calculated-times.csv",
   )
   knobs.add_argument('--color',
      help='color for graph\nDefault: %(default)s',
   )
   knobs.add_argument('--hist_pos',
      help='left or right positioning of histogram\nDefault: %(default)s',
      choices=['left', 'right'],
   )
   knobs.add_argument('--maxline',
      help='Add an annotated line at the max measurement\nDefault: %(default)s',
      action=YesNo,
   )
   knobs.add_argument('--grid_style',
      help='Grid lines color',
      choices=['white', 'gray'],
   )
   knobs.add_argument('--summary',
      help='Print summary stats, max, mean, min and number of datums.',
      action=YesNo,
   )
   knobs.add_argument('--summary_only',
      help="Only Print summary stats, don't generate graphs",
      action=YesNo,
   )


   knobs.parse_args(argv[1:])
   knobs.add_cfgs(knobs.get_opts().config)
   opts = knobs.get_opts()

   # This isn't strictly necessary, but it seems to improve
   # performance if you're not displaying any graphs
   # Also, works if there is no DISPLAY
   # Changed from mpl.use to plt.switch_backend so
   # we don't have to worry about careful ordering
   # of imports
   if not opts.display:
      plt.switch_backend('Agg')

###
### Return 'root' of file/pathname, i.e. strip off the path
### and any extensions, turning this:
###     "/foo/bar/readme.txt.md"
### into this:
###     "readme"
###
def get_root_fn(fn):
   ''' Return filename without path or extensions, i.e.
   turn this:
      "/foo/bar/readme.txt.md"
   into this:
      "readme"
   '''
   from os.path import basename

   fname_ext = basename(fn)
   root = fname_ext.split('.')[0]
   return root

###
### Open a data file, either plain text or gzipped.
###
### Returns the result of a call to open or gzip.open,
### both of which are context managers, so it can be
### used in a 'with' statement.
###
@default_opts(opts)
def open_data(fn, pre_n=100, opts=opts):
   '''Open data file that is either text or gzipped'''

   import gzip

   if fn.endswith('.gz') or guess_type(fn)[1] == 'gzip':
      f = gzip.open(fn, 'rt')  # need 'rt' else it returns bytes instead of str
   else:
      f = open(fn, 'rt')

   return f


###
### Generator that yields chunks of a series with data
### cleaned and calculated
###
@default_opts(opts)
def process_chunks(chunks, opts):
   """Read in raw data and calculate times, after
   checking for and eliminating bad data (negative times, &c).

   Returns a Series of times in same units as input timestamps
   (Usually nanoseconds)
   """
   columns = opts.columns
   for chunk in chunks:
      df = chunk
      if not columns:
            columns = list(df.columns[:2])
      # Check for errors and bad data in input file
      # If I convert to_numeric before removing bad data, numbers will
      # end up fp, as any NaN turns the column into fp.  This way, if
      # they are integers it should stay integer, but will still handle
      # floating point input properly
      if opts.error_check:
            for col in columns:
               s = pd.to_numeric(df[col])
               bad_data = pd.to_numeric(s).isnull() | (s <= 0)
               if bad_data.any():
                  print('Bad/missing data in file '+filename+':\n',
                        df[bad_data],
                        file=stderr)
                  df = df.loc[~bad_data]
               df[col] = pd.to_numeric(df[col])

      if len(columns) == 2:
            time_series = df[columns[1]] - df[columns[0]]
            if opts.error_check:
               # Check for negative times resulting from calculation
               non_pos = time_series<=0
               if non_pos.any():
                  print('Non-positive times calculated from file ' +
                        filename + ':\n',
                        time_series.loc[non_pos], file=stderr)
                  time_series = time_series[~non_pos]
      elif len(columns) == 1:
            time_series = df[columns[0]]
      else:
            raise ValueError("Need 1 or 2 columns of data")

      yield time_series

###
### get_data_start_end - if input is not a csv file, then
### beginning and end of csv data must be marked by opts.data_start
### and opts.data_end.
### get_data_start_end will determine the number of rows to
### skip and how many rows to read so read_csv can read the
### data properly.
###
@default_opts(opts)
def get_data_start_end(fn, opts):
   # search for START and END of data
   with open_data(fn) as f:
      for i,line in enumerate(f):
            if line.startswith(opts.data_start):
               skiprows = i+1  # next line is header
               break
      else:
            raise ValueError(f"Missing {opts.data_start}, or rename to .csv file")

      i = 0           # In case data_start is last line of file
      for i,line in enumerate(f):
            if line.startswith(opts.data_end):
               nrows = i-1     # Previous line was last data row
               break
      else:
            nrows = i
            print(f'No {opts.data_end}, assuming data goes to end of file', file=stderr)

   if nrows <= 0:      # If no header and no data, i-1 == -1
      raise ValueError(f"No data found (nrows == 0)")

   return skiprows, nrows

###
### get_clean_deltas - Read in data from e2e raw output or csv, return series
### consisting of time deltas.  If opts.columns contains 1 or 2 column names,
### then these columns are taking as delta or start and end columns,
### respectively.
### If opts.columns is not set, then the start and end columns
### are taken to be the first 2 columns in the csv file.  If there
### is only one column in the data, then that is taken as the time
### delta column
###
### Negative times or other errors are filtered out with a warning to
### the user.
###
@default_opts(opts)
def get_clean_deltas(filename, opts):
   """Read in raw data and calculate times, after
   checking for and eliminating bad data (negative times, &c).

   Returns a Series of times in microseconds.
   """

   if guess_type(filename)[0] != 'text/csv':
      skiprows, nrows = get_data_start_end(filename)
      csv_opts = {'skiprows': skiprows}
      if opts.nrows != None:
            nrows = min(nrows, opts.nrows)
      csv_opts['nrows'] = nrows
   else:
      csv_opts = select(vars(opts), ['skiprows', 'nrows'])

   # Found an odd problem when a long run that has missing data
   # causes the dtype to default to float instead of int.  This resulted
   # in a loss of precision which caused bad banding in the output graph.
   # By setting the dtype to object, it treats them as strings until
   # I can filter out any NaNs and other bad data, then convert them to int
   csv_opts['dtype'] = object if opts.error_check else int
   csv_opts['chunksize'] = 2**16
   csv_opts['usecols'] = opts.columns or None
   with open_data(filename) as f:
      dfg = pd.read_csv(f, **csv_opts)
      time_series = pd.concat(process_chunks(dfg))

   time_series.name = get_root_fn(filename)
   time_series.index.name = 'Sequence Number'
   return time_series

###
### Calculate response times from the difference between start and end times.
### Return in dataframe with each column corresponding to a file of raw data
###
@expecting_list
@default_opts(opts)
def get_times(data_files, opts):
   '''Return DataFrame of times in microseconds, each column corresponding
   to an input file
   '''
   df_times = pd.DataFrame()

   for fn in data_files:
      time_series = get_clean_deltas(fn, opts)
      df_times = pd.concat([df_times, time_series], axis=1)

   df_times.columns.name = 'File'
   df_times.index.name = 'Sequence Number'
   return df_times

###
### Add 'Intel Confidential' notation to fig
###
def mark_confidential(fig):
   fig.text(0.95, 0.04,
            'Intel Confidential',
            fontsize=12,
            ha='right',
            color='blue',
   )

###
### Humanizing tick numbers
###
# mu == 'μ' == '\N{Greek small letter Mu}'
time_units = {
   10**9: 's',
   10**6: 'ms',
   10**3: 'μs',
   1: 'ns',
   0: 'ns',
}

units = {
   10**9: 'G',
   10**6: 'M',
   10**3: 'K',
   1: '',
   0: '',
}

###
### Change n to human readable scale
### dict 'units' determines units to place
### after the number
###
def scale(pn, units=units, epsilon=.5):
   '''Change n to human readable scale'''
   sign = '-' if pn < 0 else ''
   n = abs(pn)
   if n >= 0 and n < epsilon:
      return  str(f'0{units[0]}')
   factors = [i for i in units if i > 0]
   for i in sorted(factors, reverse=True):
      if n >= i:
            scaled = n/i
            return str(f'{sign}{scaled:,.0f}{units[i]}')
   assert f'bad n == {n}'

###
### Scale nanoseconds to human friendly formats
###
def scale_ns(n):
   return scale(n, units=time_units)

###
### Scale nanoseconds to human friendly formats
### Term 'decades' borrowed from matplotlib code,
### refers to powers of 10, i.e. lim=(10**2,1**5)
### would be 3 decades.
### minor ticks are sometimes the only ticks available,
### but need to filterm them a bit or the log scale
### labels get crowded.
###
def log_scale_ns(n, drop_digits='', lim=None):
   '''scale nanoseconds for log axis

drop some tick labels if it's too crowded
'''
   import re, math
   drop = drop_digits
   if lim and not drop:
      decades = math.log(abs(lim[1]-lim[0]), 10)
      decade_drops = {1: '', 2: '79', 4: '4679'}
      drop = '346789'
      for k in sorted(decade_drops):
            if decades < k:
               drop = decade_drops[k]
               break

   x = scale(n, units=time_units)
   # if re.match('-?[4689]', x):
   if drop and re.match('-?['+drop+']', str(x)):
      return ''
   else:
      return x

###
### Set up ticks and spines
###
@default_opts(opts)
def set_style_ticks_spines(ax, opts):
   # Gray x and y axes
   tick_color = 'gray'
   face_color = '#F0F0F0'
   invisible = '#ffffff00'
   if opts.grid_style == 'gray':
      grid_color = 'gray'
      ax.tick_params(axis='x', colors=grid_color, labelcolor=tick_color)
   else:
      grid_color = 'white'
      ax.tick_params(axis='x', colors=invisible, labelcolor=tick_color)
      ax.xaxis.grid(color=grid_color, linestyle='-')
   ax.set_facecolor(face_color)

   # Hide the left, right and top spines
   for spine in ['right', 'left', 'top', 'bottom']:
      ax.spines[spine].set_visible(False)

   # y-ticks and grid
   ax.set_axisbelow(True)
   ax.yaxis.grid(color=grid_color, linestyle='-')
   ax.tick_params(axis='y',
                  colors=tick_color,
                  labelcolor=tick_color,
                  which='both')
   if ax.yaxis.get_scale() == 'linear':
      ax.tick_params(axis='y', colors=invisible, labelcolor=tick_color, which='both')

   ### custom formatting with commas has been replaced by
   ### scaled units version near end of graph_series.
   ### That location insures we've got a good handle on limits
   ### and the ticklabels are more human friendly
   # def add_commas(upper_lim,axis):
   #     with_commas = lambda x, p: format(int(x), ',')
   #     if upper_lim > 999 and axis.get_scale() == 'linear':
   #         axis.set_major_formatter(mpl.ticker.FuncFormatter(with_commas))
   # add_commas(ax.get_xlim()[1], ax.xaxis)
   # add_commas(ax.get_ylim()[1], ax.yaxis)

###
### Generate 2 part plot, a plot of values in sequence order, and an
### aligned histogram to the side to indicate the density of points.
###
@default_opts(opts)
def graph_series(time_series, title=opts.title, ylim=None, opts=opts):
   '''Generate 2 part plot, a plot of values in sequence order, and an
   aligned histogram to the side to indicate the density of points.
   '''
   # Make life a little easier for interactive users by adding
   # some flexibility w.r.t. input type.
   if type(time_series) == type(pd.DataFrame()):
      for c in time_series.columns:
            fig = graph_series(time_series[c], title=f'{title}: {c}', ylim=ylim, opts=opts)
      return fig

   maxidx = time_series.idxmax() # may be used for notations on graph
   max1 = time_series[maxidx]
   min1 = time_series.min()
   mean1 = time_series.mean()
   std1 = time_series.std()
   jitter = max1 - min1

   # When the number of points gets too large, matplotlib ends up
   # taking a lot of time and memory, so this will sample
   # a portion of the points, preserving maxes and mins
   # It shouldn't affect the graph too much; the numbers on the
   # histogram will be off but the shape will be similar.
   # If necessary, we could use the original series for the histogram,
   # but trying this for now.
   numdots = len(time_series)
   if numdots >= 1000000:
      from random import sample
      chosen = []
      chunksize = numdots//25000
      samplesize = 10
      for i in range(0, len(time_series), chunksize):
            t = time_series.iloc[i:i+chunksize]
            chunk = set(t.index)
            s = {t.idxmax()}
            s.add(t.idxmin())
            if len(chunk) > 2*samplesize:
               s = s.union(sample(tuple(chunk), samplesize))
            chosen += sorted(s)
      time_series = time_series[chosen]

   ###
   ### Plot vlines, trimming out the ones near the mean for large
   ### data sets.
   ###
   def plot_vlines(ax):
      if len(time_series) > 10000:
            lower_lim = mean1-std1/3
            upper_lim = mean1+std1/2
            lines_below = time_series < lower_lim
            lines_above = time_series > upper_lim
            trim_idx = lines_below | lines_above
            ts = time_series[trim_idx]
      else:
            ts = time_series

      ax.vlines(ts.index,
                  mean1,
                  ts,
                  alpha=0.5,
                  color='lightblue',
                  linestyle='solid')

   from matplotlib.gridspec import GridSpec

   fig = plt.figure(figsize=(12,6), dpi=100)
   gs = GridSpec(1,6)
   if opts.hist_pos == 'right':
      seq = fig.add_subplot(gs[0:-1])
      hist = fig.add_subplot(gs[-1], sharey=seq)
      horizontal_scale = lambda t, p: scale(t)
      vertical_scale = lambda t, p: scale_ns(t)
      hist.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(vertical_scale))
      hist.xaxis.set_major_formatter(mpl.ticker.FuncFormatter(vertical_scale))
      seq.yaxis.tick_right()
      direction = lambda x: x
   else:
      seq = fig.add_subplot(gs[1:])
      hist = fig.add_subplot(gs[0], sharey=seq)
      hist.yaxis.tick_right()
      direction = reversed

   nbins = opts.nbins
   if nbins == None:
      # nbins not set by user
      # By trial and error, this formula for nbins is about right for now
      nbins = (max1 - min1)//25
      nbins = max(50,nbins)
      nbins = min(2000, nbins)

   time_series.plot.hist(bins=nbins,
                        ax=hist,
                        logx=True,
                        color=opts.color,
                        orientation='horizontal')
   hist.set_xlim(direction(hist.get_xlim()))
   hist.set(ylabel=f'Time in {opts.units}', xlabel='Count (log)')
   annotation_position = {'xy': (0.25, .03), 'xycoords': 'figure fraction'}
   seq.set(xlabel='Sample Number')
   if opts.logy:
      hist.semilogy()
      hist.set_ylabel(hist.get_ylabel()+' (log)')

   if opts.maxline:
      seq.axhline(y=max1, xmin=0, xmax=1, color='red', alpha=.5)
      last = time_series.index[-1]
      xpos = min(maxidx, last*.96) # Don't stick too far out to right
      seq.annotate(f'Max = {max1:,.0f}',
            xy=(xpos, max1),
            xycoords='data',
            xytext=(-20, 2),
            textcoords="offset points",
            color='#A00000',
            alpha=.8)

   plot_vlines(seq)

   markersize = 1 if len(time_series) > 100000 else 2
   time_series.plot(ax=seq,
                     linestyle='',
                     marker='o',
                     markersize=markersize,
                     color=opts.color)
   if ylim:
      seq.set_ylim(ylim)

   annotation = ' | '.join([
      f"Min = {min1:,.0f}",
      f"Mean = {mean1:,.1f}",
      f"Max = {max1:,.0f}",
      f"Jitter = {jitter:,.0f}"])

   bbox = {'facecolor': 'lightgray', 'alpha': 0.3, 'linewidth': 0.0}
   font = {'size': 11, 'family': 'monospace'}

   hist.annotate(annotation, **annotation_position, bbox=bbox, **font)

   fig.suptitle(title, fontsize=20)
   if opts.subtitle:
      fig.text(0.5, 0.91, opts.subtitle, fontsize=12, ha='center')

   set_style_ticks_spines(hist, opts)
   set_style_ticks_spines(seq, opts)

   # add the confidential warning if needed.
   if opts.confidential:
      mark_confidential(fig)

   # human friendly ticklabels - numbers and scaled units
   # instead of lots of digits or exponential notation
   # With this we don't need the 'with_commas' formatter
   horizontal_scale = lambda t, p: scale(t)
   vertical_scale = lambda t, p: scale_ns(t)
   vertical_log_scale = lambda t, p: log_scale_ns(t, lim=hist.get_ylim())

   hist.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(vertical_scale))
   hist.xaxis.set_major_formatter(mpl.ticker.FuncFormatter(horizontal_scale))
   hist.yaxis.set_minor_formatter(mpl.ticker.FuncFormatter(vertical_log_scale))
   seq.xaxis.set_major_formatter(mpl.ticker.FuncFormatter(horizontal_scale))

   # tight_layout affects many things, in particular if I put it
   # after subplots_adjust, it's messed up.
   fig.tight_layout()
   fig.subplots_adjust(top=0.88, bottom=0.15)

   return fig

if __name__ == '__main__':
   if not opts.data_files:
      raise SystemExit("No input files specified")

   for filename in opts.data_files:
      durations = get_clean_deltas(filename, opts)

      if opts.summary or opts.summary_only:
            maxidx = durations.idxmax() # may be used for notations on graph
            max1 = durations[maxidx]
            min1 = durations.min()
            mean1 = durations.mean()
            print(f'{filename}: Max: {max1}: Mean: {mean1} Min: {min1} N: {len(durations)}')

      save_deltas = opts.save_deltas
      outpath = Path(opts.output_path)
      if save_deltas:
            fn_prefix = filename.split('.')[0]
            save_file = outpath/'-'.join([fn_prefix,save_deltas])
            durations.to_csv(save_file, index=False)

      if opts.summary_only:
            continue

      # Plot the figure from durations
      ylim = opts.ymin, opts.ymax
      # for col, srs in durations.items():
      name = get_root_fn(filename)
      title = opts.title or name
      title = opts.title_prefix + title + opts.title_suffix
      fig = graph_series(durations, title, ylim, opts)

      # Need to save before showing, otherwise interactive
      # manipulation affects the saved file
      if opts.save_image:
            prefix = opts.image_file_prefix or ''
            fig.set_tight_layout(True)
            fig.savefig(outpath/(prefix+name+'.png'), bbox_inches="tight")

      if opts.display:
            plt.show()
            plt.close()
            del fig
      del durations