import requests
try:
    from io import BytesIO
except (ImportError, SystemError):
    from cStringIO import StringIO as BytesIO
import os
import logging
import numpy as np
from ..utils import *
from ...timeseries_selectors import collapse_timeseries
from .... import settings


class InvalidDatafileContent(Exception): pass


def load_asc_grid(fh, **meta):
    HEADERS = [
        ('ncols', int),
        ('nrows', int),
        ('xllcorner', float),
        ('yllcorner', float),
        ('cellsize', float),
        ('NODATA_value', float)
    ]
    headers = dict()
    for header_name, header_type in HEADERS:
        try:
            file_header_name, value = fh.readline().decode('ascii').strip().split()
        except ValueError:
            raise InvalidDatafileContent(meta.get('source', None))

        if header_name != file_header_name:
            raise InvalidDatafileContent('invalid ASC header ({} != {})'.format(header_name, file_header_name))
        headers[header_name] = header_type(value)

    data = np.zeros((headers['nrows'], headers['ncols']), dtype=float)
    row_index = 0
    for line in (l.strip() for l in fh):
        if not line:
            continue
        data[row_index, :] = [float(v) for v in line.split()]
        row_index += 1

    data = np.flipud(np.ma.masked_equal(data, headers['NODATA_value']))

    x = np.arange(headers['xllcorner'],
                  headers['xllcorner'] + headers['ncols'] * headers['cellsize'],
                  headers['cellsize'])
    y = np.arange(headers['yllcorner'],
                  headers['yllcorner'] + headers['nrows'] * headers['cellsize'],
                  headers['cellsize'])

    return x, y, data, meta


def load_nc_gridstack(D, tslice=None, min_density=None, min_coverage=None, **meta):
    if tslice is None:
        # limit to 15 recent timesteps to prevent too many values
        tslice = slice(-15, None)

    try:
        x = D.variables['x'][:] - D.variables['x'].resolution / 2.
        y = D.variables['y'][:] - D.variables['y'].resolution / 2.
        t = D.variables['time'][tslice]
    except AttributeError:
        raise InvalidDatafileContent('could not read x, y, t from datafile')

    # try to load z data several times (unreliable loading of large number of values)
    for attempt in range(3):
        try:
            data = D.variables['z'][tslice, :, :]
            break
        except RuntimeError as e:
            try:
                vardesc = str(D.variables['z'])
            except Exception:
                vardesc = ''
            logging.getLogger(__name__).error(str(e)+'\n'+str(meta)+'\n'+vardesc)
            continue
    else:
        raise InvalidDatafileContent(
            'could not read z data from datafile: {}'
            'contents may be too large {} (slice={})'.format(D.variables['z'].shape, tslice))

    meta['density'] = D.variables['density'][tslice]
    meta['sid'] = D.variables['sid'][tslice]

    tmask = np.ones(t.size, dtype=bool)

    if min_density is not None:
        old_count = tmask.sum()
        tmask[meta['density'] < min_density] = False
        logging.getLogger(__name__).debug(
            'filtered {} timesteps by point density'.format(old_count - tmask.sum()))

    if min_coverage is not None:
        old_count = tmask.sum()
        coverages = 1 - data.mask.sum(axis=(1, 2)).astype(float) / np.product(data.shape[1:])
        tmask[coverages < min_coverage] = False
        logging.getLogger(__name__).debug(
            'filtered {} timesteps by data coverage'.format(old_count - tmask.sum()))

    ind = np.arange(t.size)[tmask]

    t = t[ind]
    data = data[ind, :, :]
    meta['density'] = meta['density'][ind]
    meta['sid'] = meta['sid'][ind]

    return x, y, t, data, meta


def load_nc_grid(D, ti='recent', sid=None, **kw):
    x, y, t, data, meta = load_nc_gridstack(D, **kw)

    newmeta = meta.copy()

    if sid is not None:
        ti = meta['sid'].index(sid)

    if isinstance(ti, int):
        data = data[ti, :, :]
        newmeta['time'] = t[ti]
        for k, v in meta.items():
            if isinstance(v, np.ndarray) and v.shape == t.shape:
                newmeta[k] = v[ti]
    elif isinstance(ti, str):
        data = collapse_timeseries(data, ti)
        for k, v in meta.items():
            if isinstance(v, np.ndarray) and v.shape == t.shape:
                del newmeta[k]
    else:
        raise ValueError('unknown time value {}'.format(ti))

    return x, y, data, newmeta


def load_npz_grid(fh, **meta):
    data = np.load(fh)
    Z = np.ma.masked_invalid(data['Z'])
    return data['x'], data['y'], Z, data['meta'][()]


def load_npz_gridstack(fh, **meta):
    data = np.load(fh)
    Z = np.ma.masked_invalid(data['Z'])
    return data['x'], data['y'], data['t'], Z, data['meta'][()]


def load_xyz_points(fh, columns=['x', 'y', 'z', 't0', 't1'], delimiter='\s+', **meta):
    from ...pointdata import PointDataset
    return PointDataset.load(fh, delimiter=delimiter, columns=columns, **meta)


def load_csv_points(fh, **meta):
    from ...pointdata import PointDataset
    return PointDataset.load(fh, delimiter=',', **meta)


POINT_FTYPES = dict(
    XYZ=load_xyz_points,  # space separated, no headers: assume [x y z t0 t1]
    CSV=load_csv_points,  # comma separated with headers
)


GRID_FTYPES = dict(
    ASC=load_asc_grid,
    NC=load_nc_grid,
    NPZ=load_npz_grid
)

GRIDSTACK_FTYPES = dict(
    NC=load_nc_gridstack,
    NPZ=load_npz_gridstack
)


def as_readable(f, ftype=None, meta=None):
    if ftype == 'NC':
        # try to read from cache
        if settings.nc_cachedir is not None:
            cachepath = os.path.join(settings.nc_cachedir, os.path.split(f)[1])
            if os.path.isfile(cachepath):
                f = cachepath
                if meta is not None:
                    meta['cached_location'] = cachepath
                logging.getLogger(__name__).info('loading cached {}'.format(f))
        # open nc file
        import netCDF4
        try:
            return netCDF4.Dataset(f, 'r')
        except IOError as e:
            logging.getLogger(__name__).error('could not read datafile {!r}; location may be unavailable'.format(f))
            raise
    elif isinstance(f, str):
        if f.startswith('http'):
            resp = requests.get(f)
            resp.raise_for_status()
            return BytesIO(resp.content)
        elif os.path.isfile(f):
            return open(f, 'rb')
        else:
            raise ValueError(f)
    else:
        try:
            f.read, f.readline
        except AttributeError:
            raise TypeError('not a buffer')
        else:
            return f


def _load(f, ftype, datatype, **meta):
    logging.getLogger(__name__).info('loading {} as ftype={}'.format(f, ftype or '<undefined>'))

    if isinstance(f, str):
        fname = f
    else:
        fname = meta.get('source', None)

    if ftype is None:
        if fname is not None:
            ftype = os.path.splitext(fname)[1].lstrip('.').upper()

    if not ftype:
        ftype = guess_ftype(f, datatype=datatype)

    if isinstance(f, str):
        f = as_readable(f, ftype=ftype, meta=meta)

    loader = dict(grid=GRID_FTYPES, gridstack=GRIDSTACK_FTYPES, points=POINT_FTYPES)[datatype][ftype]

    return loader(f, **meta)


def load_griddata(f, ftype=None, **meta):
    return _load(f, ftype, 'grid', **meta)


def load_gridstackdata(f, ftype=None, **meta):
    return _load(f, ftype, 'gridstack', **meta)


def load_pointdata(f, ftype=None, **meta):
    return _load(f, ftype, 'points', **meta)