#outputs of this script:
# - a dataframe with indicators excluding gbullung
# - - a dataframe with indicators excluding gbullung containing only the indicator data with the date

import numpy as np
import datetime
from datetime import timedelta
import pandas as pd
from dateutil.relativedelta import relativedelta

#load the complete indicator dataset -> this was
indicator_complete = pd.read_csv('C:/Users/joepb/PycharmProjects/start_data/farmers/Indicators - Copy.csv',sep = ';')
indicator_complete['Date'] = pd.to_datetime(indicator_complete['Date'], format = '%d-%m-%Y')
indicator_complete = indicator_complete.fillna(0)

#remove all Gbullung observations
Gbullung_obs = indicator_complete.loc[indicator_complete['Community'] == 'Gbullung']
index_names = indicator_complete[indicator_complete['Community'] == 'Gbullung'].index
index_names_diare = indicator_complete[indicator_complete['Community']=='Diari'].index
indicator_complete.drop(index_names, inplace = True)
indicator_complete.drop(index_names_diare, inplace = True)
indicator_complete.reset_index(inplace=True)
del indicator_complete['index']

#Save the CSV
indicator_complete.to_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_excl_gbullung.csv')

#drop the unnecessary columns + put the Date Column at the end
indicator_data = indicator_complete.drop(['Community', 'How many mm did the rain gauge indicate?'], axis = 1)
new_cols = [col for col in indicator_data.columns if col != 'Date'] + ['Date']
indicator_data = indicator_data[new_cols]

#save this dataframe
indicator_data.to_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_data_only.csv',index = False)

#select only the 2020 dataset (used for analysis)
indicator_data_2020 = indicator_data.loc[indicator_data['Date'] < '2020-11-01 00:00:00']
indicator_data_2020['Date'] = pd.to_datetime(indicator_data_2020['Date'])
indicator_data_2020.to_csv('C:/Users/joepb/PycharmProjects/data_storage/indicator_data_only_2020.csv',index = False)

#---------------------------------------------------------
#Make extra columns with same-day rain, next day rain, 2 days rain, 3 days rain
#---------------------------------------------------------

#load the observations dataset
complete_observations = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/Farmer_observations.csv')
complete_observations['datetime'] = pd.to_datetime(complete_observations['datetime'])

#if you only want to make the df_timetorain dataframe for 2020 use this line
# complete_observations = complete_observations.loc[complete_observations['datetime'] < '2020-11-01 00:00:00']

#This makes a dataframe which shows how long it will take until rain will fall. Eventually, only the sameday column was used in this thesis.
df_timetorain = pd.DataFrame(columns=np.arange(5))
df_timetorain.columns = ['sameday','1 day','2 day','3 day', 'datetime']
i=0
j=0
for i in range(len(indicator_complete)):
    for j in range(len(complete_observations)):
        if indicator_complete.loc[i,'Date'] == complete_observations.loc[j, 'datetime']:
            df_timetorain.loc[i,'datetime'] = complete_observations.loc[j, 'datetime']
            if complete_observations.loc[j,'value'] > 0:
                df_timetorain.loc[i,'sameday'] = 1
            else:
                df_timetorain.loc[i, 'sameday'] = 0
        if indicator_complete.loc[i,'Date'] == (complete_observations.loc[j, 'datetime'] - timedelta(days=1)):
            if complete_observations.loc[j,'value'] > 0:
                df_timetorain.loc[i,'1 day'] = 1
            else:
                df_timetorain.loc[i, '1 day'] = 0
        if indicator_complete.loc[i,'Date'] == (complete_observations.loc[j, 'datetime'] - timedelta(days=2)):
            if complete_observations.loc[j,'value'] > 0:
                df_timetorain.loc[i,'2 day'] = 1
            else:
                df_timetorain.loc[i, '2 day'] = 0
        if indicator_complete.loc[i,'Date'] == (complete_observations.loc[j, 'datetime'] - timedelta(days=3)):
            if complete_observations.loc[j,'value'] > 0:
                df_timetorain.loc[i,'3 day'] = 1
            else:
                df_timetorain.loc[i, '3 day'] = 0

df_timetorain.fillna(0, inplace=True)

df_timetorain = df_timetorain.drop(df_timetorain.loc[df_timetorain['datetime'] == 0].index,axis='index')

df_timetorain.to_csv('C:/Users/joepb/PycharmProjects/data_storage/time_to_rain.csv', index = False)


#2020 only------------------

#2020 only
indicator_complete_2020 = indicator_complete.loc[indicator_complete['Date'] < '2020-11-01 00:00:00']

complete_observations_2020 = pd.read_csv('C:/Users/joepb/PycharmProjects/data_storage/Farmer_observations_2020.csv')
complete_observations_2020['datetime'] = pd.to_datetime(complete_observations_2020['datetime'])

df_timetorain = pd.DataFrame(columns=np.arange(5))
df_timetorain.columns = ['sameday','1 day','2 day','3 day', 'datetime']
i=0
j=0
for i in range(len(indicator_complete_2020)):
    for j in range(len(complete_observations_2020)):
        if indicator_complete_2020.loc[i,'Date'] == complete_observations_2020.loc[j, 'datetime']:
            df_timetorain.loc[i,'datetime'] = complete_observations_2020.loc[j, 'datetime']
            if complete_observations_2020.loc[j,'value'] > 0:
                df_timetorain.loc[i,'sameday'] = 1
            else:
                df_timetorain.loc[i, 'sameday'] = 0
        if indicator_complete_2020.loc[i,'Date'] == (complete_observations_2020.loc[j, 'datetime'] - timedelta(days=1)):
            if complete_observations_2020.loc[j,'value'] > 0:
                df_timetorain.loc[i,'1 day'] = 1
            else:
                df_timetorain.loc[i, '1 day'] = 0
        if indicator_complete_2020.loc[i,'Date'] == (complete_observations_2020.loc[j, 'datetime'] - timedelta(days=2)):
            if complete_observations_2020.loc[j,'value'] > 0:
                df_timetorain.loc[i,'2 day'] = 1
            else:
                df_timetorain.loc[i, '2 day'] = 0
        if indicator_complete_2020.loc[i,'Date'] == (complete_observations_2020.loc[j, 'datetime'] - timedelta(days=3)):
            if complete_observations_2020.loc[j,'value'] > 0:
                df_timetorain.loc[i,'3 day'] = 1
            else:
                df_timetorain.loc[i, '3 day'] = 0



df_timetorain.fillna(0, inplace=True)

df_timetorain_2020 = df_timetorain.copy()
df_timetorain_2020 = df_timetorain_2020.drop(df_timetorain.loc[df_timetorain['datetime'] == 0].index,axis='index')
df_timetorain_2020['datetime'] = pd.to_datetime(df_timetorain_2020['datetime'])
df_timetorain_2020.to_csv('C:/Users/joepb/PycharmProjects/data_storage/time_to_rain_2020.csv', index = False)

