### Created by Jaho Koo, IHE Delft, TU Delft, K-water

import numpy as np
import pandas as pd

import tensorflow as tf
from sklearn.cluster import KMeans

result_path = "D:\\"
input_path = "D:\\"



def Prepare_data_iter(DC_, B, F, E, min_, max_, t_nm):

    DC = DC_.copy()
    e = E
    DC_tmp = DC[DC['E'] == e][DC.columns[1:]].copy()
    for i in range(B-1):
        col = DC_tmp.columns.tolist()
        DC_tmp = pd.concat([DC_tmp, DC_[DC_['E'] == e][DC_.columns[1:]].shift(i + 1)], axis=1)
        DC_tmp.columns = col + [k + '-{}'.format(i + 1) for k in DC_.columns.tolist()[1:]]
    DC_tmp['E'] = e
    for j in range(F):
        DC_tmp['{}_+{}'.format(t_nm, j+1)] = DC_tmp['{}'.format(t_nm)].shift(-j-1)
    DC_tmp.dropna(inplace=True)
    DCn = DC_tmp.copy()

    DCn_s = (DCn - min_) / (max_ - min_)
    DCn_s['E'] = DCn['E']

    X_ = DCn_s[DCn_s.columns[:-1*F-1]]
    y_ = DCn[['{}_+{}'.format(t_nm, i+1) for i in range(F)]]

    return X_, y_


def call_DC_(only_AR = True):

    DC_ = pd.read_excel(input_path + r"DC_training_wavelet.xlsx")
    DC_.drop([DC_.columns[0], DC_.columns[2]], axis=1, inplace=True)
    DC_ = DC_[DC_['E'] > 19].copy()
    DC_.dropna(inplace=True)
    DC_.reset_index(drop=True, inplace=True)

    if only_AR == True:
        DC_ = DC_[['E', 'INF']].copy()
        nm = 'Deep_BNN_DC_inf_AR'
    else:
        nm = 'Deep_BNN_DC_inf'

    min_ = DC_.min()['INF']
    max_ = DC_.max()['INF']

    return DC_, nm, min_, max_



def call_MH_(only_AR = True):

    DC_ = pd.read_excel(input_path + r"wl_down_events.xlsx")
    DC_.drop([DC_.columns[0], DC_.columns[2]], axis=1, inplace=True)
    DC_ = DC_[DC_['E'] > 19].copy()
    DC_.dropna(inplace=True)
    DC_.reset_index(drop=True, inplace=True)

    if only_AR == True:
        DC_ = DC_[['E', 'mh_wl']].copy()
        nm = 'Deep_BNN_mh_wl'
    else:
        nm = 'Deep_BNN_mh_wl'

    min_ = DC_.min()['mh_wl']
    max_ = DC_.max()['mh_wl']

    return DC_, nm, min_, max_


def call_GP_(only_AR = True):

    DC_ = pd.read_excel(input_path + r"wl_down_events.xlsx")
    DC_.drop([DC_.columns[0], DC_.columns[2]], axis=1, inplace=True)
    DC_ = DC_[DC_['E'] > 19].copy()
    DC_.dropna(inplace=True)
    DC_.reset_index(drop=True, inplace=True)

    if only_AR == True:
        DC_ = DC_[['E', 'gp_wl']].copy()
        nm = 'Deep_BNN_gp_wl'
    else:
        nm = 'Deep_BNN_gp_wl'

    min_ = DC_.min()['gp_wl']
    max_ = DC_.max()['gp_wl']

    return DC_, nm, min_, max_


def Scenarios_DC_2S_(DC_, E, F, k, B=11):

    _, _, _, _, min_, max_ = Prepare_data(DC_=DC_, B=B, F=F)
    X_, y_ = Prepare_data_iter(DC_, B, F, E, min_, max_, t_nm='INF')
    DC_inf_nn = tf.keras.models.load_model(input_path + "Deep_BNN_DC_inf_AR_opt_F12.keras")

    T = 1000
    predictions = []
    for _ in range(T):
        predictions += [DC_inf_nn.predict([X_.iloc[k].to_list()], verbose=0)]

    predictions = np.array(predictions).reshape(T, F)
    P_ = pd.DataFrame(predictions)
    P_['sum'] = P_.sum(axis=1)

    P__ = P_.copy()


    ### ======== 1st stage: clustering based on sum of inflow ======================
    km_1 = KMeans(n_clusters=5)
    km_1.fit(P__['sum'].to_numpy().reshape(len(P__), 1))
    P__['NC'] = km_1.labels_

    ### ======== 2st stage: clustering based on inflow pattern ======================
    Scens = []
    for i in range(5):
        tmp_ = P__[P__['NC']==i]
        km_2 = KMeans(n_clusters=5)
        km_2.fit(tmp_[P__.columns[:-2]])
        tmp__ = tmp_.copy()
        tmp__['NC2_'] = km_2.labels_
        tmp__['NC2d'] = km_2.transform(tmp_[P__.columns[:-2]].to_numpy()).min(axis=1)
        for j in range(5):
            tmp2_ = tmp__[tmp__['NC2_'] == j]
            S__ = tmp2_.sort_values('NC2d').iloc[:5].to_numpy().tolist()
            Scens += S__

    uncertain_inf = np.array(Scens)[:, :-4]

    return uncertain_inf


def Scenarios_DC_1S_(DC_inf_nn, X_, F, k, n_clusters = 5, T_scenarios = 10, type_ = 'close'):

    T = 1000
    predictions = []
    for _ in range(T):
        predictions += [DC_inf_nn.predict([X_.loc[k].to_list()], verbose=0)]

    predictions = np.array(predictions).reshape(T, F)
    P_ = pd.DataFrame(predictions)
    P_['sum'] = P_.sum(axis=1)

    P__ = P_.copy()

    ### ========  clustering ======================
    km_1 = KMeans(n_clusters=n_clusters)
    km_1.fit(P__[P__.columns[:-1]].to_numpy())
    P__['NC'] = km_1.labels_
    P__['NCd'] = km_1.transform(P__[P__.columns[:-2]].to_numpy()).min(axis=1)

    Centeroid_ = {}
    for i in range(n_clusters):
        c_tmp = P__[P__['NC'] == i][P__.columns[:-3]]


    NO_ = {}
    NO_p_ = {}
    for i in range(n_clusters):
        no_tmp = P__[P__['NC'] == i].count().iloc[-2]
        NO_[i] = no_tmp
        NO_p_[i] = no_tmp / len(P__)

    Scens_dict = {}
    Scens_ls = []
    P_cs = {}

    ss_ = 0
    ee_ = T_scenarios - 1
    if type_ == 'close':
        asc = True
    elif type_ == 'far':
        asc = False
    for i in range(n_clusters):
        ttmp_ = P__[P__['NC'] == i].sort_values('NCd', ascending=asc).iloc[ss_:ee_].to_numpy().tolist()
        ttmp_ += [Centeroid_[i] + [sum(Centeroid_[i])] + [i] + [0]]
        n_cs = len(ttmp_)
        p_cs = NO_p_[i] / n_cs
        P_cs[i] = round(p_cs, 5)
        ttmp_ = [i+[p_cs] for i in ttmp_]
        Scens_dict[i] = ttmp_
        Scens_ls += ttmp_
    Scens_ls = np.array(Scens_ls)
    Scens_df = pd.DataFrame(Scens_ls)
    Scens_df.columns = Scens_df.columns[:F].to_numpy().tolist() + ['sum', 'C', 'D', 'P']
    uncertain_inf = Scens_df

    return Scens_df


def Scenarios_GP_1S_(GP_nn, X_, E, F, k, n_clusters = 5, T_scenarios = 10, type_ = 'close'):

    T = 1000
    predictions = []
    for _ in range(T):
        predictions += [GP_nn.predict([X_.loc[k].to_list()], verbose=0)]

    predictions = np.array(predictions).reshape(T, F)
    P_ = pd.DataFrame(predictions)
    P_['sum'] = P_.sum(axis=1)
    P__ = P_.copy()

    ### ========  clustering ======================
    km_1 = KMeans(n_clusters=n_clusters)
    km_1.fit(P__[P__.columns[:-1]].to_numpy())
    P__['NC'] = km_1.labels_
    P__['NCd'] = km_1.transform(P__[P__.columns[:-2]].to_numpy()).min(axis=1)

    Centeroid_ = {}
    for i in range(n_clusters):
        c_tmp = P__[P__['NC'] == i][P__.columns[:-3]]
        Centeroid_[i] = c_tmp.mean().tolist()

    NO_ = {}
    NO_p_ = {}
    for i in range(n_clusters):
        no_tmp = P__[P__['NC'] == i].count().iloc[-2]
        NO_[i] = no_tmp
        NO_p_[i] = no_tmp / len(P__)

    Scens_dict = {}
    Scens_ls = []
    P_cs = {}
    ss_ = 0
    ee_ = T_scenarios - 1
    if type_ == 'close':
        asc = True
    elif type_ == 'far':
        asc = False
    for i in range(n_clusters):
        ttmp_ = P__[P__['NC'] == i].sort_values('NCd', ascending=asc).iloc[ss_:ee_].to_numpy().tolist()
        ttmp_ += [Centeroid_[i] + [sum(Centeroid_[i])] + [i] + [0]]
        n_cs = len(ttmp_)
        p_cs = NO_p_[i] / n_cs
        P_cs[i] = round(p_cs, 5)
        ttmp_ = [i+[p_cs] for i in ttmp_]
        Scens_dict[i] = ttmp_
        Scens_ls += ttmp_
    Scens_ls = np.array(Scens_ls)
    Scens_df = pd.DataFrame(Scens_ls)
    Scens_df.columns = Scens_df.columns[:F].to_numpy().tolist() + ['sum', 'C', 'D', 'P']

    return Scens_df


def Scenarios_MH_1S_(MH_nn, X_, E, F, k, n_clusters = 5, T_scenarios = 10, type_ = 'close'):

    T = 1000
    predictions = []
    for _ in range(T):
        predictions += [MH_nn.predict([X_.loc[k].to_list()], verbose=0)]

    predictions = np.array(predictions).reshape(T, F)
    P_ = pd.DataFrame(predictions)
    P_['sum'] = P_.sum(axis=1)

    P__ = P_.copy()

    ### ========  clustering ======================
    km_1 = KMeans(n_clusters=n_clusters)
    km_1.fit(P__[P__.columns[:-1]].to_numpy())
    P__['NC'] = km_1.labels_
    P__['NCd'] = km_1.transform(P__[P__.columns[:-2]].to_numpy()).min(axis=1)

    Centeroid_ = {}
    for i in range(n_clusters):
        c_tmp = P__[P__['NC'] == i][P__.columns[:-3]]
        Centeroid_[i] = c_tmp.mean().tolist()

    NO_ = {}
    NO_p_ = {}
    for i in range(n_clusters):
        no_tmp = P__[P__['NC'] == i].count().iloc[-2]
        NO_[i] = no_tmp
        NO_p_[i] = no_tmp / len(P__)

    Scens_dict = {}
    Scens_ls = []
    P_cs = {}
    ss_ = 0
    ee_ = T_scenarios - 1
    if type_ == 'close':
        asc = True
    elif type_ == 'far':
        asc = False
    for i in range(n_clusters):
        ttmp_ = P__[P__['NC'] == i].sort_values('NCd', ascending=asc).iloc[ss_:ee_].to_numpy().tolist()
        ttmp_ += [Centeroid_[i] + [sum(Centeroid_[i])] + [i] + [0]]
        n_cs = len(ttmp_)
        p_cs = NO_p_[i] / n_cs
        P_cs[i] = round(p_cs, 5)
        ttmp_ = [i+[p_cs] for i in ttmp_]
        Scens_dict[i] = ttmp_
        Scens_ls += ttmp_
    Scens_ls = np.array(Scens_ls)
    Scens_df = pd.DataFrame(Scens_ls)
    Scens_df.columns = Scens_df.columns[:F].to_numpy().tolist() + ['sum', 'C', 'D', 'P']

    return Scens_df



def mean_Scenarios_1S_(B_nn, X_, E, F, k, T = 1000):

    T = T
    predictions = []
    for _ in range(T):
        predictions += [B_nn.predict([X_.loc[k].to_list()], verbose=0)]

    predictions = np.array(predictions).reshape(T, F)
    P_ = pd.DataFrame(predictions)
    mean_uncertain_inf = P_.mean().to_numpy()

    return mean_uncertain_inf


def cal_pre_bnn_inputs(F, E, target_, type_, n_clusters, T_scenarios):

    DC_, nm, DC_min_, DC_max_ = call_DC_()
    GP_, nm, GP_min_, GP_max_ = call_GP_()
    MH_, nm, MH_min_, MH_max_ = call_MH_()

    DC_B = 8
    GP_B = 9
    MH_B = 3

    DC_X_, DC_y_ = Prepare_data_iter(DC_, DC_B, F, E, DC_min_, DC_max_, t_nm='INF')
    GP_X_, GP_y_ = Prepare_data_iter(GP_, GP_B, F, E, GP_min_, GP_max_, t_nm='gp_wl')
    MH_X_, MH_y_ = Prepare_data_iter(MH_, MH_B, F, E, MH_min_, MH_max_, t_nm='mh_wl')
    ss_k = max(DC_X_.index[0], GP_X_.index[0], MH_X_.index[0])
    ee_k = min(DC_X_.index[-1], GP_X_.index[-1], MH_X_.index[-1])

    if target_ == 'DC':
        nn = tf.keras.models.load_model(input_path + "Deep_BNN_DC_inf_AR_opt_F12.keras")
        X_ = DC_X_
        y_ = DC_y_
        B = DC_B
        target_nm_ = 'INF'
    elif target_ == 'GP':
        nn = tf.keras.models.load_model(input_path + "Deep_BNN_gp_wl_opt_F12.keras")
        X_ = GP_X_
        y_ = GP_y_
        B = GP_B
        target_nm_ = 'wl'
    elif target_ == 'MH':
        nn = tf.keras.models.load_model(input_path + "Deep_BNN_mh_wl_opt_F12.keras")
        X_ = MH_X_
        y_ = MH_y_
        B = MH_B
        target_nm_ = 'wl'


    for k in range(ss_k, ee_k+1):
        U_1 = Scenarios_DC_1S_(nn, X_, E, F, k=k, n_clusters=n_clusters, T_scenarios=T_scenarios, B=B, type_=type_, plot_=False)
        U_1.to_excel(result_path + '{}_F{}_E{}_{}_{}_{}_C{}_T{}.xlsx'.format(target_, F, E, target_nm_, k, type_, n_clusters, T_scenarios))

