# -*- coding: utf-8 -*-
"""
Using legacy data to reconstruct the past?
Rescue, rigor and reuse in peatland geochronology
  
Cindy Quik1, Ype van der Velde2, Tom Harkema1, Hans van der Plicht3,
Jim Quik4, Roy van Beek1,5, Jakob Wallinga1

1 Soil Geography and Landscape Group,
  Wageningen University, Wageningen, the Netherlands
2 Faculty of Science, Earth and Climate, Vrije Universiteit Amsterdam,
  Amsterdam, the Netherlands
3 Centre for Isotope Research, University of Groningen,
  Groningen, the Netherlands
4 Freelance programmer, Wageningen, the Netherlands
5 Cultural Geography Group, Wageningen University, Wageningen, the Netherlands

Quality assessment script to assess legacy datasets.
See publication for more information.
"""

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Global variables
red = (192/255,80/255,77/255)
green = (155/255,187/255,89/255)
purple = (128/255,100/255,162/255)
orange = (247/255,150/255,70/255)

def read_weights(filename):
    """
    Read tab delimited weights data to dataframe.
    """
    df = pd.read_csv(filename,index_col='Property',delimiter='\t',
                     encoding = "ISO-8859-1")
    return df

def read_data(filename):
    """
    Read tab delimited legacy dataset to dataframe.
    """
    df = pd.read_csv(filename,delimiter='\t',encoding = "ISO-8859-1",
                     dtype=str,keep_default_na=False, skiprows=2)
    return df

def NA_or_NR(value):
    """Evaluate 'NA' or 'NR' as true, else false."""
    return value == 'NA' or value == 'NR'

def known(value):
    """Evaluate 'NA' or 'NR' as false, else true."""
    return not NA_or_NR(value)

def mean_and_SD(Mean14C,SD14C,CalMean,UnknownMean,weight):
    """Assess data on criteria"""
    if known(Mean14C) and known(SD14C):
        return 0 * weight
    elif known(Mean14C) and not known(SD14C):
        return 1 * weight
    elif known(CalMean):
        return 1 * weight
    elif known(UnknownMean):
        return 2 * weight
    else:
        return float('Inf')

def delta13(Delta13,Delta13source,weight):
    """Assess data on criteria"""
    if known(Delta13) and Delta13source == 'Measured':
        return 0 * weight
    elif known(Delta13) and Delta13source == 'Estimated':
        return 1 * weight
    else:
        return 2 * weight

def carbon_content(CarbonContent,weight):
    """Assess data on criteria"""
    if known(CarbonContent):
        return 0 * weight
    else:
        return 1 * weight

def xy(X,Y,XYcategory,weight):
    """Assess data on criteria"""
    if known(X) and known(Y) and XYcategory == 'Recorded':
        return 0 * weight
    elif known(X) and known(Y) and XYcategory == 'Field':
        return 1 * weight
    elif known(X) and known(Y) and XYcategory == 'Placename':
        return 2 * weight
    else:
        return float('Inf')

def elevation(DepthBelowSurface,DepthNAP,weight):
    """Assess data on criteria"""
    if known(DepthNAP):
        return 0 * weight
    if known(DepthBelowSurface):
        return 1 * weight
    else:
        return 2 * weight

def landform(Landform,weight):
    """Assess data on criteria"""
    if known(Landform):
        return 0 * weight
    else:
        return 2 * weight

def stratigraphy(Stratigraphy,weight):
    """Assess data on criteria"""
    if known(Stratigraphy):
        return 0 * weight
    else:
        return 2 * weight

def samplethickness(SampleThickness,weight):
    """Assess data on criteria"""
    if known(SampleThickness):
        return 0 * weight
    else:
        return 1 * weight

def sampletype(SampleType,weight):
    """Assess data on criteria"""
    if SampleType == 'Macro':
        return 0 * weight
    if SampleType == 'Bulk':
        return 1 * weight
    else:
        return 2 * weight

def speciestype(SpeciesType,weight):
    """Assess data on criteria"""
    if SpeciesType == 'Terrestrial':
        return 0 * weight
    if SpeciesType == 'Aquatic' or SpeciesType == 'Both':
        return 1 * weight
    else:
        return 2 * weight

def aboveground(Aboveground,weight):
    """Assess data on criteria"""
    if Aboveground == 'Yes':
        return 0 * weight
    if Aboveground == 'No':
        return 1 * weight
    else:
        return 2 * weight

def pretreatment(Pretreatment,weight):
    """Assess data on criteria"""
    if Pretreatment == 'ABA':
        return 0 * weight
    if Pretreatment == 'OnlyA':
        return 1 * weight
    else:
        return 2 * weight

def quadrant(Qd, QT, Qd_lim, QT_lim):
    """Assess data on criteria"""
    if Qd<Qd_lim and QT<QT_lim:
        return 'green'
    elif Qd<Qd_lim and QT>=QT_lim:
        return 'purple'
    elif Qd>=Qd_lim and QT<QT_lim:
        return 'orange'
    else:
        return 'red'

def quality_assessment(data,weights,Qd_lim_rel=.5,QT_lim_rel=.5,normalize=True,
                       normalize_limit=1):
    """
    Apply quality assessment to dataset.

    Parameters
    ----------
    data : pandas.DataFrame
        Dataframe of the dataset.
    weights : pandas.DataFrame
        Dataframe of the weights.
    Qd_lim_rel : float, optional
        Limit of green quadrant, relative to the maximum Qd value.
        The default is 0.5.
    QT_lim_rel : float, optional
        Limit of green quadrant, relative to the maximum QT value.
        The default is 0.5.
    normalize : bool, optional
        Normalize the sum of the weights to the normalization limit.
        The default is True.
    normalize_limit : float, optional
        The normalization limit. This should be a positive number.
        The default is 1.

    Returns
    -------
    data : pandas.DataFrame
        A dataframe of the dataset with the quality scores.
    Qd_lim : float
        Qd limit of green quadrant.
    QT_lim : float
        QT limit of green quadrant.
    Qd_max : float
        Maximum value of Qd, based on weights and normalization.
    QT_max : float
        Maximum value of QT, based on weights and normalization.

    """
    if normalize:
        if normalize_limit == 0:
            raise ValueError('The normalization limit cannot be equal to 0.')
        max_weights = sum(weights['Weight'])*2
        weights['Weight'] = weights.apply(
            lambda row: float(row.to_string(index=False))/
            max_weights*normalize_limit,axis=1)
    Qd_max = (weights['Weight']['MeanSD'] + 
              weights['Weight']['Delta13'] +
              weights['Weight']['CarbonContent'] + 
              weights['Weight']['SampleType'] + 
              weights['Weight']['SpeciesType'] + 
              weights['Weight']['Aboveground'] + 
              weights['Weight']['Pretreatment']) * 2
    QT_max = (weights['Weight']['XY'] + 
              weights['Weight']['Elevation'] + 
              weights['Weight']['Landform'] + 
              weights['Weight']['Stratigraphy'] + 
              weights['Weight']['SampleThickness']) * 2
    Qd_lim = Qd_lim_rel*Qd_max
    QT_lim = QT_lim_rel*QT_max
    
    data['QA Mean and SD'] = data.apply(
        lambda row: mean_and_SD(row['14CMean'],row['14CSD'],
                                row['CalMean'],row['UnknownMean'],
                                weights['Weight']['MeanSD']), axis=1)
    data['QA Delta13'] = data.apply(
        lambda row: delta13(row['Delta13'],row['Delta13source'],
                            weights['Weight']['Delta13']), axis=1)
    data['QA Carbon content'] = data.apply(
        lambda row: carbon_content(row['CarbonContent'],
                                   weights['Weight']['CarbonContent']), axis=1)
    data['QA XY'] = data.apply(
        lambda row: xy(row['X'],row['Y'],row['XYcategory'],
                       weights['Weight']['XY']), axis=1)
    data['QA Elevation'] = data.apply(
        lambda row: elevation(row['DepthBelowSurface'],row['DepthNAP'],
                              weights['Weight']['Elevation']), axis=1)
    data['QA Landform'] = data.apply(
        lambda row: landform(row['Landform'],
                             weights['Weight']['Landform']), axis=1)
    data['QA Stratigraphy'] = data.apply(
        lambda row: stratigraphy(row['Stratigraphy'],
                                 weights['Weight']['Stratigraphy']), axis=1)
    data['QA Sample thickness'] = data.apply(
        lambda row: samplethickness(row['SampleThickness'],
                                    weights['Weight']['SampleThickness']), 
        axis=1)
    data['QA Sample type'] = data.apply(
        lambda row: sampletype(row['SampleType'],
                               weights['Weight']['SampleType']), axis=1)
    data['QA Species type'] = data.apply(
        lambda row: speciestype(row['SpeciesType'],
                                weights['Weight']['SpeciesType']), axis=1)
    data['QA Aboveground'] = data.apply(
        lambda row: aboveground(row['Aboveground'],
                                weights['Weight']['Aboveground']), axis=1)
    data['QA Pre-treatment'] = data.apply(
        lambda row: pretreatment(row['Pretreatment'],
                                 weights['Weight']['Pretreatment']), axis=1)
    data['Qd'] = data.apply(
        lambda row: sum([row['QA Mean and SD'],row['QA Delta13'],
                         row['QA Carbon content'],row['QA Sample type'],
                         row['QA Species type'],row['QA Aboveground'],
                         row['QA Pre-treatment']]), axis=1)
    data['QT'] = data.apply(
        lambda row: sum([row['QA XY'],row['QA Elevation'],
                         row['QA Landform'],row['QA Stratigraphy'],
                         row['QA Sample thickness']]), axis=1)
    data['Q'] = data.apply(
        lambda row: sum([row['Qd'],row['QT']]), axis=1)
    data['Quadrant'] = data.apply(
        lambda row: quadrant(row['Qd'],row['QT'],Qd_lim, QT_lim), axis=1)
    return data, Qd_lim, QT_lim, Qd_max, QT_max

def quality_QA_plot(data, Qd_lim, QT_lim, Qd_max, QT_max):
    """
    Generate quality assessment plot

    Parameters
    ----------
    data : pandas.DataFrame
        Dataframe of the dataset with quality scores.
    Qd_lim : float
        Qd limit of green quadrant.
    QT_lim : float
        QT limit of green quadrant.
    Qd_max : float
        Maximum value of Qd.
    QT_max : float
        Maximum value of QT.

    Returns
    -------
    fig : matplotlib.figure
        The generated figure.

    """
    fig, axs = plt.subplots(1, 1)
    data.plot(x='QT',y='Qd',kind='scatter',marker='o',ax=axs,c='k',zorder=5)
    axs.add_patch(patches.Rectangle((0,0),QT_lim,Qd_lim,fc=green,zorder=1))
    axs.add_patch(patches.Rectangle((QT_lim,0),QT_max,Qd_lim,
                                    fc=purple,zorder=2))
    axs.add_patch(patches.Rectangle((0,Qd_lim),QT_lim,Qd_max,
                                    fc=orange,zorder=3))
    axs.add_patch(patches.Rectangle((QT_lim,Qd_lim),QT_max,Qd_max,
                                    fc=red,zorder=4))
    
    axs.set_aspect('equal', 'box')
    axs.set_xlabel('$Q_T$')
    axs.set_ylabel('$Q_d$')
    axs.set_xlim([0,QT_max])
    axs.set_ylim([0,Qd_max])
    return fig

if __name__ == "__main__":
    weights_filename = "Weights.txt"
    data_filename = "LegacyDataset.txt"
    weights = read_weights(weights_filename)
    data = read_data(data_filename)
    data, Qd_lim, QT_lim, Qd_max, QT_max= quality_assessment(data,weights)
    print(f'The maximum value of Qd is {Qd_max:.3}')
    print(f'The maximum value of QT is {QT_max:.3}')
    print(f'The Qd limit of the first quadrant is {Qd_lim:.3}')
    print(f'The QT limit of the first quadrant is {QT_lim:.3}')
    fig = quality_QA_plot(data, Qd_lim, QT_lim, Qd_max, QT_max)
    fig.savefig('QA.png',dpi=300,bbox_inches='tight')
    data.to_csv('QA.txt',sep='\t')