import pandas as pd
import xarray as xr
from datetime import datetime, timedelta  # Correct import statement

# Read the Excel file 'points' and drop duplicates
file = './points.xlsx'
data = pd.read_excel(file)

df = data.drop_duplicates(subset=['date', 'latitude', 'longitude'], keep='last').reset_index(drop=True)
data_list = df.values.tolist()

# Define the directories for different datasets
dataset_dir_1 = "/NAS_Public/MSWEP/3hourly/1979.nc"
dataset_dir_2 = "/NAS_Public/MSWEP/3hourly/1980_1989.nc"
dataset_dir_3 = "/NAS_Public/MSWEP/3hourly/1990_1999.nc"
dataset_dir_4 = "/NAS_Public/MSWEP/3hourly/2000_2009.nc"
dataset_dir_5 = "/NAS_Public/MSWEP/3hourly/2010_2019.nc"
dataset_dir_6 = "/NAS_Public/MSWEP/3hourly/2020_2023.nc"

# Function to select the correct dataset based on the year
def select_dataset(year):
    if year == 1979:
        return xr.open_dataset(dataset_dir_1)
    elif 1980 <= year <= 1989:
        return xr.open_dataset(dataset_dir_2)
    elif 1990 <= year <= 1999:
        return xr.open_dataset(dataset_dir_3)
    elif 2000 <= year <= 2009:
        return xr.open_dataset(dataset_dir_4)
    elif 2010 <= year <= 2019:
        return xr.open_dataset(dataset_dir_5)
    elif 2020 <= year <= 2023:
        return xr.open_dataset(dataset_dir_6)
    else:
        raise ValueError(f"No dataset available for the year {year}")

# Process each point in the data list
for record in data_list:
    point_id = record[0]
    date_point = record[1]
    lat = record[2]
    lon = record[3]
    country = record[4]

    year = date_point.year
    month = date_point.month
    day = date_point.day

    # Open the appropriate NetCDF dataset file for the year
    ds = select_dataset(year)
    ds.rio.write_crs("epsg:4326", inplace=True)

    min_lon = lon - 0.15
    min_lat = lat - 0.15
    max_lon = lon + 0.15
    max_lat = lat + 0.15

    target_date = datetime(year, month, day)
    prev_date = target_date - timedelta(hours=75)
    next_date = target_date + timedelta(hours=50)

    # Select the subset of the dataset for the given time range and spatial extent
    date_slice = slice(f"{prev_date}", f"{next_date}")
    dds = ds.sel(time=date_slice)
    subset = dds.rio.clip_box(minx=min_lon, miny=min_lat, maxx=max_lon, maxy=max_lat)

    # Save the subset to a new NetCDF file
    output_filename = f"{point_id}-{country}.nc"
    subset.to_netcdf(output_filename, mode="w")

    print(f"Processed and saved: {output_filename}")
