"""
load data from various sources:
    - SVNRepository
        An SVN repository server from which the datafile urls are ripped and downloaded
    - Directory
        Directory on the computer
    - ZipArchive
        Zip archive as a file or stream
    - NCCatalog
        Catalog of netCDF4 references, specify an attribute for the listing
"""
from __future__ import print_function
import os
import zipfile
import requests
try:
    from io import BytesIO
except (ImportError, SystemError):
    from cStringIO import StringIO as BytesIO
import xml.etree.ElementTree as ET
from .reading import load_griddata, load_gridstackdata, load_pointdata
from ..griddata import GridDataset, GridTimeStackDataset, join_grids
from ... import settings
import logging


__all__ = ['Source', 'SVNRepository', 'Directory', 'ZipArchive', 'NCCatalog', 'DataRecord', 'DataRecordList']


class Source(object):
    """
    base class for data sources
    it defines an iterable from which to load data
    a specific loader function can be specified, which is passed to DataRecord
    alternatively, a datatype can be specified, which has predefined loader functions:

        grid:         load as (x, y, Z, meta) data
        gridstack:    load as (x, y, t, Z, meta) data
        points:       load as PointDataset object
        gridobj:      load as GridDataset object
        gridstackobj: load as GridTimeStackDataset object

    additional keyword arguments are passed to the DataRecord objects
    iteration over DataRecord objects can be done as follows:

    >>>for record in Source(...).iter(**opts):
    >>>    ...

    where opts are passed to the DataRecord object and overwrite the loadopts defined in the Source object
    A DataRecordList can be obtained using:

    >>>records = Source(...).list()

    See DataRecordList for more information
    """

    DATA_TYPES = dict(
        grid=load_griddata,
        gridstack=load_gridstackdata,
        points=load_pointdata,
        gridobj=GridDataset.load,
        gridstackobj=GridTimeStackDataset.load
    )
    DEFAULT_DATA_TYPE = 'gridobj'

    def __init__(self, ref, location=None, loader=None, datatype=None, **loadopts):
        self.reference = ref
        if loader is None and datatype is not None:
            loader = self.DATA_TYPES[datatype]
        self.loader = loader
        self.location = location or ref
        self.loadopts = loadopts

    def iter(self, maxcount=None, **kw):
        for i, item in enumerate(self.generate_items(**kw)):
            if not maxcount or i < maxcount:
                yield item

    def make_item(self, ref, location=None, **kw):
        """create a data record"""
        opts = self.loadopts.copy()
        opts.update(kw)
        logging.getLogger(__name__).debug('data record: {}'.format(ref))
        return DataRecord(ref,
                          self.loader or self.DATA_TYPES[self.DEFAULT_DATA_TYPE],
                          location=location,
                          **opts)

    def generate_items(self, **kw):
        raise NotImplementedError('generate_items')

    def list(self, **kw):
        return DataRecordList(self.iter(**kw))

    def __iter__(self):
        return self.iter()

    def __repr__(self):
        return '<{} {!r}>'.format(self.__class__.__name__, self.location)


class Directory(Source):

    """
    Source for loading files from a directory

    To load files from a directory using a specific function fn with additional options:
    >>>d = Directory(location, loader=fn, **opts)
    >>>recordlist = d.list()

    To load files from a directory of a specific datatype:
    >>>d = Directory(location, datatype='gridobj')
    >>>recordlist = d.list()

    To loop over all records:
    >>>for record in Directory(location).iter():
    >>>    dataset = record.load()
    """

    def generate_items(self, **kw):
        for f in os.listdir(self.reference):
            yield self.make_item(os.path.join(self.reference, f), **kw)


class ZipArchive(Source):

    """
    Source for loading files from a zip archive

    See Directory for examples
    """

    def generate_items(self, **kw):
        Z = zipfile.ZipFile(self.reference, 'r')
        for name in Z.namelist():
            yield self.make_item(Z.open(name), location=self.location+'/'+name, **kw)


class NCCatalog(Source):

    """
    Source for loading datasets from a netCDF4 catalog file

    The catalog source requires an attribute from which to retrieve locations
    >>>c = NCCatalog(path_or_url, attr='urlPath')
    >>>for record in c.iter(xll=500000, yll=5800000, w=5000, h=5000):
    >>>    data = record.load()
    """

    DEFAULT_DATA_TYPE = 'gridstackobj'

    def __init__(self, loc=None, attr='urlPath', **kw):
        if loc is None:
            loc = settings.catalog_path

        self.attr = attr
        super(NCCatalog, self).__init__(loc, **kw)

    def generate_items(self, location=None,
                       xll=None, yll=None, w=None, h=None,
                       t=None, sid=None, min_density=None, min_coverage=None, **kw):
        import netCDF4
        D = netCDF4.Dataset(self.location, 'r')
        xcov = D.variables['projectionCoverage_x'][:]
        ycov = D.variables['projectionCoverage_y'][:]

        for i, row in enumerate(D.variables[self.attr][:]):
            if xll is not None and (xcov[i, 1] < xll or xcov[i, 0] > xll+w):
                continue
            if yll is not None and (ycov[i, 1] < yll or ycov[i, 0] > yll+h):
                continue
            if t is not None:
                time = list(D.variables['time'][i])
                if t not in time:
                    continue
                kw.update(
                    ti=time.index(t),
                    time=t)

            yield self.make_item(str(b''.join(row).decode('utf-8')), location=location, min_density=min_density, min_coverage=min_coverage, sid=sid, **kw)


class SVNRepository(Source):

    """
    Source for loading data from an SVN repository

    Can load data from a repository containing files
    Zip files on the repository are also scanned for data files

    >>>s = SVNRepository(url, username=None, password=None, ...)
    """

    AUTH = None

    def __init__(self, url, username=None, password=None, **kwargs):
        if username is not None and password is not None:
            self.AUTH = requests.auth.HTTPBasicAuth(username, password)
        super(SVNRepository, self).__init__(url, **kwargs)

    def generate_items(self, **kw):
        for file_url in self._search_svn_dir(self.location):
            resp = requests.get(file_url, stream=True, auth=self.AUTH)
            resp.raise_for_status()
            f = BytesIO(resp.content)

            if os.path.splitext(file_url)[1] == '.zip':
                Z = ZipArchive(f, location=file_url, loader=self.loader)
                for item in Z.generate_items():
                    yield item
            else:
                yield self.make_item(f, location=file_url, **kw)

    def _search_svn_dir(self, url):
        resp = requests.get(url, auth=self.AUTH)
        resp.raise_for_status()
        root = ET.fromstring(resp.text)
        for ind in root.findall('index'):
            for e in ind:
                if e.tag == 'dir':
                    for item in self._search_svn_dir(self.join_url(url, e.attrib['href'])):
                        yield item
                elif e.tag == 'file':
                    yield self.join_url(url, e.attrib['href'])

    @staticmethod
    def join_url(*args):
        items = []
        for i, item in enumerate(args):
            if i == 0:
                item = item.rstrip('/')
            elif i + 1 == len(args):
                item = item.lstrip('/')
            else:
                item = item.strip('/')
            items.append(item)
        return '/'.join(items)


class DataRecord(object):

    """
    data record that can be loaded as a dataset
    requires a location or other reference

    >>>r = DataRecord(location_or_ref, loader)
    >>>dataset = r.load()
    """

    def __init__(self, ref, loader, location=None, **opts):
        self.reference = ref
        self.loader = loader
        if location is None and isinstance(ref, str):
            location = ref
        self.location = location
        self.opts = opts

    def load(self, loader=None, datatype=None, **kwargs):
        if loader is None and datatype is not None:
            loader = Source.DATA_TYPES[datatype]
        if loader is None:
            loader = self.loader
        opts = self.opts.copy()
        opts.update(kwargs)
        opts.setdefault('source', self.location)
        logging.getLogger(__name__).debug('loading {}'.format(self))
        return loader(self.reference, **opts)

    def __str__(self):
        return 'data record {location!r} (loader={loader})'.format(
            location=self.location,
            loader=self.loader)

    def __repr__(self):
        return '<{} {!r}>'.format(self.__class__.__name__, self.location)


class DataRecordList(list):

    """
    list subclass that supports joining of grid data as single dataset using sandwaves.datasets.griddata.join_grids
    Also has better string representation
    """

    def join(self, maxcount=10):
        if len(self) > maxcount:
            raise ValueError('too many records')
        return join_grids([item.load() for item in self])

    def __str__(self):
        return 'DataRecordList([\n    {}])'.format('\n    '.join(map(repr, self)))

    @property
    def locations(self):
        return [item.location for item in self]
