library(tidyverse)
library(sf)
library(ggmap)
library(ggspatial)
library(gtfstools)
library(lubridate)
library(ineq)
library(accessibility)
library(geobr)
library(aopdata)
library(viridis)
library(reldist)
library(stringr)
library(ggrepel)
library(BAMMtools)
library(corrplot)
library(Hmisc)
library(rJava) #needed to install java

# java  settings
options(java.parameters = "-Xmx8G") #20
# Sys.setenv(JAVA_HOME="D:/Program Files/jdk-11")
library(r5r)

#default color options
colors_diff = "plasma"
colors_base = "cividis"

#-1 functions####
breaks_func <- function(df, variab, df_scenarios, i, n_breaks, measure) {
  # creates the values that breaks a vector based on the n_breaks, considering if it is
  #base scenario or not. returns a vector of the same size as the nrow(df), classifying each value,
  #and also the associated breaks scale and labels, to use in ggplot
  if ((df_scenarios$scenario[i]=="base") & (measure!="fos")) { #for the base scenario, maps of different moments have the same scale
    x <- df %>%
      st_drop_geometry() %>%
      filter(urban==T) %>%
      select(starts_with(paste0(ifelse(measure %in% c("sastm", "fos"),"median_" ,"" ),
                                measure))) %>% 
      select(ends_with(df_scenarios$scenario_print[i]))
    
    if (measure %in% c("com","fca", "sastm")) {
      x <- x %>% 
        select(contains(as.character(df_scenarios$thresholds[i]))) %>% 
        select(contains(df_scenarios$opportunities_print[i]))
    }
    
    if (measure=="sastm") {
      if (sa_stm_scenarios$time_marker[i]=="time") {
        x <- x %>% select(contains("time"))
      }else {
        x <- x %>% select(names(x), -contains("time"))
      }
    }
    
    x <- gather(x)
    x <- x$value
    
  } else if ((measure!="fos") &
             (df_scenarios$opportunities_print[i] %in% c("_P", "_E"))) {# diff e difE with same scale
    x <- df %>%
      st_drop_geometry() %>%
      filter(urban==T) %>%
      select(starts_with(paste0(ifelse(measure %in% c("sastm", "fos"),"median_" ,"" ),
                                measure))) %>% 
      select(contains("diff"), contains("difE")) %>% 
      select(contains(as.character(df_scenarios$times[i])))
    
    if (measure %in% c("com","fca", "sastm")) {
      x <- x %>% 
        select(contains(as.character(df_scenarios$thresholds[i])))
    }
    
    if (measure=="sastm") {
      x <- x %>% 
        select(contains(sa_stm_scenarios$time_marker[i]))
    }
    
    x <- gather(x)
    x <- x$value
    
  } else {
    x <- (df %>% st_drop_geometry() %>% filter(urban==T))[, variab]
  }
  
  x[is.infinite(x)] <- 500 #fca returns inf for some cases, and 500 is high enough, without the numeric issues
  breaks <- unique(round_mag(getJenksBreaks(x, n_breaks+1),1))
  
  n_intervals <- length(breaks)-1
  
  df$breaks <- cut((df %>% st_drop_geometry())[, variab],
                   breaks=breaks, include.lowest = TRUE, labels=breaks[2:(n_intervals+1)])
  df$breaks[is.na(df$breaks)] <- levels(df$breaks)[1] #replace NA with the lowest level
  
  breaks_scale <- breaks[2:(n_intervals+1)]
  labels <- rev(paste0(breaks[2:(n_intervals+1)]," - ", breaks[1 :(n_intervals) ]))
  
  return(list("breaks" = df$breaks, "breaks_scale" = breaks_scale, "labels" = labels))
}

color_values <- function(x, n_inter, dir=-1){
  if (x=="base") {
    aux=cividis(n_inter, direction = dir)
  } else{
    aux=plasma(n_inter, direction = dir)
  }
  return(aux)
}

round_up <- function(x, n=0) { #n shows the position after the second relevant number
  # round up to next order of magnitude
  # > round_up(1.112)
  # [1] 1.2
  # > round_up(1.112, 1)
  # [1] 1.12
  # > round_up(0.001112, 1)
  # [1] 0.00112
  # > round_up(0.001112, 2)
  # [1] 0.001112
  # > round_up(0.001112)
  # [1] 0.0012
  order_mag <- floor(log10(x))
    core <- x/10^order_mag
    
    return(ceiling( core*10^(n+1) )/10^(n+1)
      * 10^order_mag)
}

round_mag <- function(x, n=0) { #n shows the position after the second relevant number
  # round to next order of magnitude
  # > round_mag(1.112)
  # [1] 1.1
  # > round_mag(1.112, 1)
  # [1] 1.11
  # > round_mag(0.001112, 1)
  # [1] 0.00111
  # > round_mag(0.001112, 2)
  # [1] 0.001112
  # > round_mag(0.001112)
  # [1] 0.0011
  order_mag <- floor(log10(abs(x)))
  core <- abs(x)/10^order_mag
  sign <- ifelse(x>=0, 1, -1)
  
  return( replace_na( sign * (round( core*10^(n+1) )/10^(n+1)) * 10^order_mag,
                      0))
}

palma_ratio <- function(variabl, income, weight){
#this function calculates the palma ratio of a variable in a population (ratio between average value
#of the top 10% richest over the average value of the bottom 40% )
  #creates df, 
  df <- data.frame(cbind(variabl, income, weight))
  
  #sorts by income
  df <- df[order(-df$income), ]
  df$pop_share <- cumsum(df$weight)/sum(df$weight, na.rm=T)
  
  #calculates ratio
  ratio <- mean(df$variabl[ df$pop_share <= 0.1 ],
                na.rm=T) /   #top 10%
    mean(df$variabl[ df$pop_share >= 0.6 ],
         na.rm=T) #bottom 40%
  
  return(ratio)
}

# 0 Centroids####
zones="OD"

sfc_as_cols <- function(x, names = c("x","y")) {
  stopifnot(inherits(x,"sf") && inherits(sf::st_geometry(x),"sfc_POINT"))
  ret <- sf::st_coordinates(x)
  ret <- tibble::as_tibble(ret)
  stopifnot(length(names) == ncol(ret))
  x <- x[ , !names(x) %in% names]
  ret <- setNames(ret,names)
  dplyr::bind_cols(x,ret)
}

points <- st_read("layers/centroids_zones_coord.shp") %>%
  filter(NumeroMuni == 36) %>%
  rename(idz=NumeroZona, X = xcoord, Y = ycoord) %>%
  select(idz,X,Y)

points$geometry <- NULL

colnames(points) <- c("id","lon","lat")

zones <- st_read("layers/Zonas2017b.shp") %>% 
  filter(NumeroMuni==36) %>%
  st_transform(4326)%>%
  st_make_valid()
colnames(zones)[colnames(zones)=="NumeroZona"] <- "idz"

# 0.1 marking cells in urban areas####
# intersect tracts with zones
census <- read_census_tract(code_tract = 3550308) %>% 
  st_transform(4326)

# calculate for each zone the % of urban area
x2 <- zones  %>%
  st_intersection(census %>% select(zone))

x2$area <- st_area(x2)

zones_type <- x2 %>% 
  st_drop_geometry() %>% 
  group_by(idz, zone) %>% 
  summarise(area=sum(area,na.rm = T))
  
zones_type <- zones_type %>% 
  spread(key = zone, value=area)

zones_type$RURAL <- as.numeric(zones_type$RURAL)
zones_type$URBANO <- as.numeric(zones_type$URBANO)
zones_type[is.na(zones_type)] <- 0 
zones_type$share_urban <- zones_type$URBANO/(zones_type$URBANO + zones_type$RURAL)
zones_type$urban <- (zones_type$share_urban > .25)

zones <- zones %>% 
  left_join(zones_type %>%
              select(idz, urban))

rm(zones_type); rm(x2)

# 1 ANALYSE OD SURVEY####
  # 1.1 travel times analysis####
    OD_SP <- read_delim(file="OD_2017.csv", delim=";") %>% #this file cannot be shared at the 4tu due to the personal data
       filter(MUNI_DOM==36, #home in São Paulo
              MUNITRA1==36, MUNITRA2==36 | is.na(MUNITRA2)) #job(s) in São Paulo
    
    #distribution of trip time (home-work), per income
    x <- OD_SP %>% 
      filter(MOTIVO_O %in% 1:3, MOTIVO_D==8, MODOPRIN %in% 1:6) %>% #TP
      left_join(persons %>% 
                  select(ID_PESS, quantile_income))
    
    ggplot(x,aes(x=quantile_income,y=DURACAO))+
      geom_boxplot()+
      ylab("Duration of the work-home trip [min]")+
      xlab("Income decile")+
      theme_light()
    
    ggsave("figures/p2-time_work-home_trip.png")
    
    rm(x)
    
    OD_SP <- OD_SP %>% 
      mutate(quantile_income = as.factor(ntile(RENDA_FA, 5))) #low quantile = low value
    
    purposes_df <- data.frame(MOTIVO_D = c(1:11),
                              purpose_D = c("Trabalho Indústria",
                                            "Trabalho Comércio",
                                            "Trabalho Serviços",
                                            "Escola/Educação",
                                            "Compras",
                                            "Médico/Dentista/Saúde",
                                            "Recreação/Visitas/Lazer",
                                            "Residência",
                                            "Procurar Emprego",
                                            "Assuntos Pessoais",
                                            "Refeição"),
                              purpose_D_group = c("Work",
                                                  "Work",
                                                  "Work",
                                                  "Education",
                                                  "Other",
                                                  "Other",
                                                  "Leisure",
                                                  "Home",
                                                  "Other",
                                                  "Other",
                                                  "Other"))
    modes_df <- data.frame(MODOPRIN = c(1:17),
                              mode = c("Metrô",
                                        "Trem",
                                        "Monotrilho",
                                        "Ônibus/micro-ônibus/perua do município de São Paulo",
                                        "Ônibus/micro-ônibus/perua de outros municípios",
                                        "Ônibus/micro-ônibus/perua metropolitano",
                                        "Transporte Fretado",
                                        "Transporte Escolar",
                                        "Dirigindo Automóvel",
                                        "Passageiro de Automóvel",
                                        "Táxi Convencional",
                                        "Táxi não Convencional",
                                        "Dirigindo Moto",
                                        "Passageiro de Moto",
                                        "Bicicleta",
                                        "A Pé",
                                        "Outros"),
                           mode_group = c("Motorised collective",
                                          "Motorised collective",
                                          "Motorised collective",
                                          "Motorised collective",
                                          "Motorised collective",
                                          "Motorised collective",
                                          "Motorised collective",
                                          "Motorised collective",
                                          "Motorised individual",
                                          "Motorised individual",
                                          "Motorised individual",
                                          "Motorised individual",
                                          "Motorised individual",
                                          "Motorised individual",
                                          "Non-motorised",
                                          "Non-motorised",
                                          "Others"))
    
    OD_SP <- OD_SP %>% 
      left_join(purposes_df) %>% 
      left_join(modes_df)
    
# 2 CALCULATE TIME MATRIX####
# build network
  #base scenario
  r5r_core <- setup_r5(data_path = "data_base", verbose = FALSE)
  
  #proposed scenario
  r5r_core_prop <- setup_r5(data_path = "data_prop", verbose = FALSE)

  mode <- c("WALK", "TRANSIT")
  max_walk_dist <- 5000 #in meters
  max_trip_duration <- 3*60 # in minutes

  # 2.1 routing####
  #starts the df with the times, calculating for hour = 0  
  # base
    departure_datetime <- as.POSIXct("16-10-2018 00:00:00",
                                     format = "%d-%m-%Y %H:%M:%S")
    
    ttm_df <- travel_time_matrix(r5r_core = r5r_core,
                                     origins = points,
                                     destinations = points,
                                     mode = mode,
                                     departure_datetime = departure_datetime,
                                     time_window = 60,
                                     max_walk_dist = max_walk_dist,
                                     max_trip_duration = max_trip_duration,
                                     verbose = FALSE)
    
    ttm_df$hour <- 0
    
    saveRDS(ttm_df, file="travel_times/ttm_window_0.rds" )
  
  #prop
    ttm_df_prop <- travel_time_matrix(r5r_core = r5r_core_prop,
                                          origins = points,
                                          destinations = points,
                                          mode = mode,
                                          departure_datetime = departure_datetime,
                                          time_window = 60,
                                          max_walk_dist = max_walk_dist,
                                          max_trip_duration = max_trip_duration,
                                          verbose = FALSE)
    
    ttm_df_prop$hour <- 0
    
    saveRDS(ttm_df_prop, file="travel_times/ttm_window_prop_0.rds" )
  
  #calculates for all the hours
  for (dep_hour in 1:23) {
    #base
    departure_datetime <- as.POSIXct(paste0("16-10-2018 ", dep_hour,":00:00"),
                                     format = "%d-%m-%Y %H:%M:%S")
    
    ttm_window <- travel_time_matrix(r5r_core = r5r_core,
                                     origins = points,
                                     destinations = points,
                                     mode = mode,
                                     departure_datetime = departure_datetime,
                                     time_window = 60,
                                     max_walk_dist = max_walk_dist,
                                     max_trip_duration = max_trip_duration,
                                     verbose = FALSE)
    
    ttm_window$hour <- dep_hour
    
    saveRDS(ttm_window, file=paste0("travel_times/ttm_window_",dep_hour ,".rds") )
    
    #prop
    ttm_window_prop <- travel_time_matrix(r5r_core = r5r_core_prop,
                                          origins = points,
                                          destinations = points,
                                          mode = mode,
                                          departure_datetime = departure_datetime,
                                          time_window = 60,
                                          max_walk_dist = max_walk_dist,
                                          max_trip_duration = max_trip_duration,
                                          verbose = FALSE)
    
    ttm_window_prop$hour <- dep_hour
    
    saveRDS(ttm_window_prop, file=paste0("travel_times/ttm_window_prop",dep_hour ,".rds") )
    
  }
  
  # rbind all the times
  ttm_df_prop <- list.files(path = "travel_times/", pattern = "prop", full.names = T) %>%
    map_dfr(readRDS)
  
  x <- list.files(path = "travel_times/", full.names = T)
  x <- x[!grepl("prop", x)]
  
  ttm_df <- x %>%
    map_dfr(readRDS)
  
  saveRDS(ttm_df,"ttm_df.rds")
  saveRDS(ttm_df_prop,"ttm_df_prop.rds")
  
  rm(x)
  
# 2.2 isochrone to test results####
L15_prop <- st_read("layers/L15_prop.shp")
selec_zones <- c("Água Rasa","Parque do Carmo","José Bonifácio","Guaianases","Cidade Tiradentes","Iguatemi","São Rafael","Ipiranga","Vila Prudente","São Lucas","Sapopemba","São Mateus")

# 2.2.1 to Sao Mateus
zones_temp <- zones

travel_times <- ttm_df[toId==264]%>% 
  filter(hour == 17) %>% 
  select(-hour)
zones_temp <- zones_temp %>% 
  left_join(travel_times, by=c("idz" = "fromId"))

travel_times <- ttm_df_prop[toId==264]%>% 
  filter(hour == 17) %>% 
  select(-hour)
zones_temp <- zones_temp %>% 
  left_join(travel_times, by=c("idz" = "fromId"))

zones_temp$tt_gain <- - zones_temp$travel_time.y + zones_temp$travel_time.x

breaks <- c(-1,0,5,10,15,20,25)
n_intervals <- length(breaks)
zones_temp$breaks <- cut((zones_temp %>% st_drop_geometry())[,"tt_gain"], breaks=breaks, include.lowest = TRUE,
                         labels=breaks[2:n_intervals], right = F)
zones_temp$breaks[is.na(zones_temp$breaks)] <- levels(zones_temp$breaks)[1] #replace NA with the lowest level
breaks_scale <- breaks[2:n_intervals]
labels <- rev(paste0(breaks[2:n_intervals]," - ", breaks[1 :(n_intervals-1) ]))

ggplot()+
  geom_sf(data=zones_temp, color="gray", fill="gray") +
  geom_sf(data=zones_temp %>% filter(urban==T), aes(fill=breaks))+
  scale_fill_manual(
    values= color_values("diff"),
    breaks=rev(breaks_scale),
    labels=labels
  )+
  geom_sf(data=zones_temp %>% filter(idz==264), color="red", fill = "red")+
  geom_sf(data=L15_prop, size=1.5)+
  ggmap::theme_nothing(legend=TRUE) +
  labs(x="", y="", fill="Travel time gains to\nSao Mateus (prop - base) [min]") +
  xlim(-46.82, -46.35)+
  ylim(-23.8 , -23.38)+
  theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
  annotation_scale(location = "bl", width_hint = 0.5) +
  annotation_north_arrow(location = "tr", which_north = "true",
                         style = north_arrow_fancy_orienteering)

ggsave("figures/p2-time_gains_sao_mateus.png")

# 2.2.2 to Sé
zones_temp <- zones

travel_times <- ttm_df[toId==1]%>% 
  filter(hour == 17) %>% 
  select(-hour)
zones_temp <- zones_temp %>% 
  left_join(travel_times, by=c("idz" = "fromId"))

travel_times <- ttm_df_prop[toId==1]%>% 
  filter(hour == 17) %>% 
  select(-hour)
zones_temp <- zones_temp %>% 
  left_join(travel_times, by=c("idz" = "fromId"))

zones_temp$tt_gain <- - zones_temp$travel_time.y + zones_temp$travel_time.x

zones_temp$breaks <- cut((zones_temp %>% st_drop_geometry())[,"tt_gain"], breaks=breaks, include.lowest = TRUE,
                         labels=breaks[2:n_intervals], right = F)
zones_temp$breaks[is.na(zones_temp$breaks)] <- levels(zones_temp$breaks)[1] #replace NA with the lowest level
breaks_scale <- breaks[2:n_intervals]
labels <- rev(paste0(breaks[2:n_intervals]," - ", breaks[1 :(n_intervals-1) ]))

ggplot()+
  geom_sf(data=zones_temp, color="gray", fill="gray") +
  geom_sf(data=zones_temp %>% filter(urban==T), aes(fill=breaks))+
  scale_fill_manual(
    values= color_values("diff"),
    breaks=rev(breaks_scale),
    labels=labels
  )+
  geom_sf(data=zones_temp %>% filter(idz==1), color="red", fill = "red")+
  geom_sf(data=L15_prop, size=1.5)+
  ggmap::theme_nothing(legend=TRUE) +
  labs(x="", y="", fill="Travel time gains to\nthe CBD (prop - base) [min]") +
  xlim(-46.82, -46.35)+
  ylim(-23.8 , -23.38)+
  theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
  annotation_scale(location = "bl", width_hint = 0.5) +
  annotation_north_arrow(location = "tr", which_north = "true",
                         style = north_arrow_fancy_orienteering)

ggsave("figures/p2-time_gains_se.png")

rm(zones_temp); rm(travel_times)

#2.2.3 extra: why individual diff is <0 in some cases
x <- ttm_df_prop %>% 
  filter(hour == 17) %>% 
  select(-hour) %>% 
  full_join(ttm_df %>% 
              filter(hour == 17) %>% 
              select(-hour),
            by=c("fromId", "toId"))

sum(x$travel_time.x > (x$travel_time.y+1), na.rm = T) #12258, 172 with 1min added
sum(x$travel_time.x < x$travel_time.y, na.rm = T) #18261 - not much bigger

x$time_increase <- (x$travel_time.x > x$travel_time.y)
x$time_decrease <- (x$travel_time.x < x$travel_time.y)

#solution: do nothing. I thought of considering diff acces=0. also tested proposed = min(proposed,base), but this resulted in an increase in the accessibility throught the city,
# what shows that the r5r is not very steady on its calculations - thus the results be given in min

# 3 OPPORTUNITIES PER ZONE####
  OD_SP <- read_delim(file="OD_2017.csv", delim=";") %>%#this file cannot be shared at the 4tu due to the personal data
    filter(MUNI_DOM==36, #home in São Paulo
           MUNITRA1==36, MUNITRA2==36 | is.na(MUNITRA2)) #job(s) in São Paulo
  
  OD_SP <- OD_SP[,c("ID_PESS", "FE_PESS", "RENDA_FA", "CD_ATIVI", "ZONATRA1", "MUNITRA1", "CO_TR1_X", "CO_TR1_Y", "TRAB1_RE", "TRABEXT1", "OCUP1",
                    "SETOR1", "VINC1", "ZONATRA2", "MUNITRA2", "CO_TR2_X", "CO_TR2_Y", "TRAB2_RE", "TRABEXT2", "OCUP2", "SETOR2",
                    "VINC2", "ZONA", "MUNI_DOM", "CO_DOM_X", "CO_DOM_Y")]
  
  #3.1 parks####
    parks <- st_read("layers/SIRGAS_SHP_parquespde5/SIRGAS_SHP_parquespde5.shp") %>% 
      filter(pde5_esfer %in% c("Parque Estadual de Proteção Integral", "Parque Estadual Urbano", "Parque Municipal Existente"),
             pde5_id!=500) %>%  # tiny polygon with problem
      st_transform(4326)%>%
      st_make_valid()
    
    #parks[201,] is a list of polygons, resulting in some problems. the following lines solve that
    for (i in 2:8) {
      parks[[7]][[201]][[i]] <- NULL  
    }
    for (i in 2:4) {
      parks[[7]][[201]][[i]] <- NULL  
    }
    parks[[7]][[201]][[2]] <- NULL  
    
    parks_time <- read.csv("layers/Area_funcionamento_parques - orig.csv") %>%  #CONTAINS ADJUSTMENTS IN THE NAMES TO MATCH
      select(-ORIG_FID, -OBJECTID_1, -pq_nome_1, -SHAPE_Length, -SHAPE_Area, -Area) %>% 
      rename(open=Abertura, close=Fechamento)
    x <- parks %>% 
      full_join(parks_time, by=c("pde5_nome"="pq_nome"))
    aux_hora_sem_geo <- x %>% 
      filter(is.na(pde5_id))
    aux_geo_sem_hora <- x %>% 
      filter(is.na(open))
    ggplot(x)+
      geom_jitter(aes(open, close)) # for those with no opening hours, we will adopt 6h and 18h
    
    parks <- parks %>% 
      left_join(parks_time, by=c("pde5_nome"="pq_nome"))
    parks$open[is.na(parks$open)] <- 6
    parks$close[is.na(parks$close)] <- 18
    parks$area_p <- as.numeric(st_area(parks)) #area in m2
    
    rm(parks_time); rm(aux_geo_sem_hora); rm(aux_hora_sem_geo)
    
    # adds park areas to zones for each time
    parkhours <- paste0("P", 5:23)
    df <- data.frame( matrix(0, ncol=19, nrow = nrow(parks)) )
    colnames(df) <- parkhours
    parks <- cbind(parks,df)
    colnames(df) <- paste0("E", 5:23)
    parks <- cbind(parks,df)
    rm(df)
    
    close_extended <- 20 # scenario where the parks close at least at 20h
    
    for (hour in 5:23) {# for each hour, marks if it is between open and close hours
      parks[ , paste0("P", hour)] <- ifelse((hour >= parks$open) & (hour < parks$close), 1, 0)
      parks[ , paste0("E", hour)] <- ifelse((hour >= parks$open) & (hour < max(parks$close, close_extended)), 1, 0)
    }
    
    park_zone <- parks  %>%
      st_intersection(zones)
    
    park_zone$area_inter <- as.numeric(st_area(park_zone)) #area in m2
    park_zone <- park_zone %>% 
      st_drop_geometry()
    
    for (hour in 5:23) {
      park_zone[ , paste0("P", hour)] <- park_zone[ , paste0("P", hour)] *
        park_zone$area_inter
      park_zone[ , paste0("E", hour)] <- park_zone[ , paste0("E", hour)] *
        park_zone$area_inter
    }
    
    y <- park_zone %>% 
      group_by(idz) %>% 
      summarise_at(c(paste0("P",5:23), paste0("E",5:23)), sum)
    
    zones <- zones %>% 
      left_join(y)
    
    rm(y)
    
    zones[is.na(zones)] <- 0
    
  #3.2 cultural opportunities####
    theaters <- st_read("layers/EQUIPAMENTOS_SHP_TEMA_CULTURA/SIRGAS_SHP_TEMA_-_CULTURA_TEATRO-CINEMA-SHOW.shp") %>% #cinema, hall, theatre, theatre and hall, theatre and cinema
      st_set_crs(31983) %>%
      st_transform(4326)
  
    museums <- st_read("layers/EQUIPAMENTOS_SHP_TEMA_CULTURA/SIRGAS_SHP_TEMA_-_CULTURA_MUSEUS.shp") %>% #museum 
      st_set_crs(31983) %>%
      st_transform(4326)
    
    cult_spaces <- st_read("layers/EQUIPAMENTOS_SHP_TEMA_CULTURA/SIRGAS_SHP_TEMA_-_CULTURA_ESPACOS_CULTURAIS.shp") %>%  #center and galleries
      st_set_crs(31983) %>%
      st_transform(4326) 
    
    libraries <- st_read("layers/EQUIPAMENTOS_SHP_TEMA_CULTURA/SIRGAS_SHP_TEMA_-_CULTURA_BIBLIOTECAS.shp") %>% #libraries
      st_set_crs(31983) %>%
      st_transform(4326)
    
    cultural_day <- rbind(museums,
                          cult_spaces,
                          libraries,
                          theaters %>%
                            filter(eq_tipo %in% c("SALAS DE CINEMA", "TEATRO E CINEMA")))
    cultural_night <- theaters
    
    x <- st_intersects(zones, cultural_day)
    zones <- cbind(zones,lengths(x))
    colnames(zones)[colnames(zones)=="lengths.x."] <- "cpd"
    
    x <- st_intersects(zones, cultural_night)
    zones <- cbind(zones,lengths(x))
    colnames(zones)[colnames(zones)=="lengths.x."] <- "cpn"
    
    rm(x); rm(theaters); rm(museums); rm(cult_spaces); rm(libraries); rm(cultural_day); rm(cultural_night)
    
    zones$idz <- as.character(zones$idz)
    saveRDS(zones, "zones.rds")
    
# 4 CALCULATE ACCESSIBILITIES####
  # 4.1 COM####
    com_scenarios <- expand.grid(thresholds = c(30, 60, 90),
                                 opportunities = c("P17", "P18", "cpd", "cpn"),
                                 scenario = c("base", "prop"),
                                 times = c(17, 18)) %>% 
      filter((opportunities %in% c("P17", "cpd") & times==17) |
               (opportunities %in% c("P18", "cpn") & times==18)) %>% 
      rbind(expand.grid(thresholds = c(30, 60, 90),
                        opportunities = c("E17", "E18"),
                        scenario = c("exte"),
                        times = c(17, 18)) %>% 
              filter((opportunities %in% c("E17") & times==17) |
                       (opportunities %in% c("E18") & times==18)))
    
    access_com <- data.frame(zones$idz)
    colnames(access_com) <- "idz"
    
    for (i in 1:nrow(com_scenarios)) {
      if (com_scenarios$scenario[i] %in% c("base", "exte") ){
        t_m <- ttm_df
      } else{
        t_m <- ttm_df_prop
      }
      
      com <- cumulative_cutoff(travel_matrix = t_m %>% 
                                 filter(hour == com_scenarios$times[i]) %>% 
                                 select(-hour) %>% 
                                 rename(from_id = fromId,
                                        to_id = toId),
                               land_use_data =  zones %>%
                                 st_drop_geometry() %>% 
                                 rename(id = idz) %>% 
                                 select(id, P17, P18, cpd, cpn, E17, E18),
                               opportunity = as.character(com_scenarios$opportunities[i]),
                               travel_cost = "travel_time",
                               cutoff = com_scenarios$thresholds[i])
      
      if (com_scenarios$opportunities[i] %in% c("P17","P18", "E17", "E18")) {#ratio with total of opportunities - should be total of park areas, not just at the hour
        com[, 2] <- com[, 2]/
          sum(parks$area_p, na.rm = T)
      } else {
        com[, 2] <- com[, 2]/
          as.numeric(sum((zones %>% 
                 st_drop_geometry())[,
                    as.character(com_scenarios$opportunities[i]) ],
              na.rm = T)
          )
      }
      colnames(com)[colnames(com)=="id"] <- "idz"
      colnames(com)[colnames(com)==as.character(com_scenarios$opportunities[i])] <-
        paste("com",
              com_scenarios$thresholds[i], 
              as.character(com_scenarios$opportunities[i]),
              com_scenarios$scenario[i],
              sep = "_")
      
      access_com <- access_com %>% 
        left_join(com)
    }
  
    rm(com)
    
    saveRDS(access_com, "access_com.rds")
    
  # 4.2 balancing time####
    greenarea_per_habitant <- 15
    
    #adds pop to zones
    OD_temp <- read_delim(file="OD_2017.csv", delim=";") %>%#this file cannot be shared at the 4tu due to the personal data
      filter(MUNI_DOM==36) #home in São Paulo
    OD_temp <- OD_temp[,c("ID_PESS", "FE_PESS", "ZONA")]
    colnames(OD_temp)[colnames(OD_temp)=="ZONA"] <- "idz"
    OD_temp$idz <- as.character(OD_temp$idz)
    
    persons_temp <- OD_temp %>% 
      group_by_all() %>% 
      summarise()
    
    rm(OD_temp)
    
    zones <- zones %>% 
      left_join(persons_temp %>% 
                  group_by(idz) %>% 
                  summarise(pop = sum(FE_PESS, na.rm=T)))
    zones$pop <- replace_na(zones$pop,0)
    
    rm(persons_temp)
    
    # bt scenarios - cultural opportunities do not have BT!
    bt_scenarios <- expand.grid(opportunities = c("P17", "P18"),# "cpd", "cpn"),
                                scenario = c("base", "prop"),
                                times = c(17, 18)) %>% 
      filter((opportunities %in% c("P17") & times==17) | #, "cpd"
               (opportunities %in% c("P18") & times==18)) %>% 
      rbind(expand.grid(opportunities = c("E17", "E18"),
                        scenario = c("exte"),
                        times = c(17, 18)) %>% 
              filter((opportunities %in% c("E17") & times==17) |
                       (opportunities %in% c("E18") & times==18))) #, "cpn"
    
    access_bt <- data.frame(zones$idz)
    colnames(access_bt) <- "idz"
    
    # bt function
    balancing_time <- function(travel_matrix, travel_cost, land_use_data, opportunity, population, population_factor){
      # travel_matrix =  fromId/ toId/ travel_time
      # land_use_data =  id / opportunity / population
      # travel_cost = "travel_time",
      # opportunity = "opportunity",
      # population  = "pop",
      # population_factor = greenarea_per_habitant
      
      # for each origin, it registers the destinations with less travel time
      # starting variable with correct dimension, but values that will be changed later
      travel_matrix <- pivot_wider(travel_matrix, names_from = to_id, values_from = travel_time)
      travel_matrix <- travel_matrix[,-1]
      travel_matrix <- as.matrix(travel_matrix) #converts to matrix
      
      OD_time_ord <- travel_matrix
      
      for (i in c(1:nrow(travel_matrix))){
        OD_time_ord[i,] <- rank(travel_matrix[i,], ties.method = "random")
      }
      
      # opportunities at the destinations are summed, in order of proximity, till suggested area is reached, registering time and position when it happens
      # starting variables with correct dimension, but values that will be changed later
      land_use_data$time <- 0
      land_use_data$pos <- 0
      
      for (i in 1:nrow(travel_matrix) ){                                      #controls origin
        for (pos in 1:ncol(travel_matrix) ){                                  #controls position at the order of proximity
          if ( sum( (OD_time_ord[i,] <= pos)* #checks if the sum of number of opportunities in all the zones till # pos is greater then the pop*green area factor
                    land_use_data[, opportunity] ) >=
               land_use_data[i, population] * population_factor ){
            land_use_data$time[i] <- max( (OD_time_ord[i,] == pos) * travel_matrix[i,] , na.rm = TRUE)  #max is used to solve draw cases
            land_use_data$pos[i] <- pos
            break
          }
        }
      }
      
      bt <- land_use_data %>% 
        select(id, time)
      colnames(bt)[2] <- opportunity
      
      return(bt) #dataframe with 2 columns: id and bt
    }
    
    #calculating
    for (i in 1:nrow(bt_scenarios)) {
      if (bt_scenarios$scenario[i]  %in% c("base", "exte") ){
        t_m <- ttm_df
      } else{
        t_m <- ttm_df_prop
      }
      
      bt <- balancing_time(travel_matrix = t_m %>% 
                             filter(hour == bt_scenarios$times[i]) %>% 
                             select(-hour) %>% 
                             rename(from_id = fromId,
                                    to_id = toId),
                           travel_cost = "travel_time",
                           land_use_data =  zones %>%
                             st_drop_geometry() %>% 
                             rename(id = idz) %>% 
                             select(id, P17, P18, E17, E18, cpd, cpn, pop),
                           opportunity = as.character(bt_scenarios$opportunities[i]),
                           population  = "pop",
                           population_factor = greenarea_per_habitant)
      
      colnames(bt)[colnames(bt)=="id"] <- "idz"
      colnames(bt)[colnames(bt)==as.character(bt_scenarios$opportunities[i])] <-
        paste("bt",
              as.character(bt_scenarios$opportunities[i]),
              bt_scenarios$scenario[i],
              #bt_scenarios$times[i],
              sep = "_")
      
      access_bt <- access_bt %>% 
        left_join(bt)
    }
    
    rm(bt)
    
    saveRDS(access_bt, "access_bt.rds")
    
    # 4.3 FCA####
    fca_scenarios <- expand.grid(thresholds = c(30, 60, 90),
                                 opportunities = c("P17", "P18", "cpd", "cpn"),
                                 scenario = c("base", "prop"),
                                 times = c(17, 18)) %>% 
      filter((opportunities %in% c("P17", "cpd") & times==17) |
               (opportunities %in% c("P18", "cpn") & times==18)) %>% 
      rbind(expand.grid(thresholds = c(30, 60, 90),
                        opportunities = c("E17", "E18"),
                        scenario = c("exte"),
                        times = c(17, 18)) %>% 
              filter((opportunities %in% c("E17") & times==17) |
                       (opportunities %in% c("E18") & times==18)))
    
    access_fca <- data.frame(zones$idz)
    colnames(access_fca) <- "idz"
    
    for (i in 1:nrow(fca_scenarios)) {
      if (fca_scenarios$scenario[i] %in% c("base", "exte")){
        t_m <- ttm_df
      } else{
        t_m <- ttm_df_prop
      }
      
      fca <- floating_catchment_area(travel_matrix = t_m %>% 
                                       filter(hour == fca_scenarios$times[i]) %>% 
                                       select(-hour) %>% 
                                       rename(from_id = fromId,
                                              to_id = toId),
                                     land_use_data =  zones %>%
                                       st_drop_geometry() %>% 
                                       rename(id = idz) %>% 
                                       select(id, P17, P18, E17, E18, pop, cpd, cpn),
                                     opportunity = as.character(fca_scenarios$opportunities[i]),
                                     travel_cost = "travel_time",
                                     demand = "pop",
                                     method = "2sfca", #the first metric in the FCA family, originally proposed by Luo and Wang (2003).
                                     decay_function = decay_binary(cutoff=fca_scenarios$thresholds[i]))#fca_scenarios$thresholds[i]
      
      colnames(fca)[colnames(fca)=="id"] <- "idz"
      colnames(fca)[colnames(fca)==as.character(fca_scenarios$opportunities[i])] <-
        paste("fca",
              fca_scenarios$thresholds[i], 
              as.character(fca_scenarios$opportunities[i]),
              fca_scenarios$scenario[i],
              sep = "_")
      
      access_fca <- access_fca %>% 
        left_join(fca)
    }
    
    rm(fca)
    
    saveRDS(access_fca, "access_fca.rds")
    
    #4.4 SA_STM####
    # creates persons table
    persons <- OD_SP %>% 
      group_by(ID_PESS, ZONA, ZONATRA1, RENDA_FA) %>% 
      summarise() %>%
      ungroup() %>% 
      mutate(quantile_income = as.factor(ntile(RENDA_FA, 10)))
    
    saveRDS(persons, "persons.rds")
    
    # SA_STM scenarios
    sa_stm_scenarios <- expand.grid(thresholds = c(60, 90, 120),
                                    opportunities = c("P17", "P18", "cpd", "cpn"),
                                    scenario = c("base", "prop"),
                                    times = c(17, 18)) %>% 
      filter((opportunities %in% c("P17", "cpd") & times==17) |
               (opportunities %in% c("P18", "cpn") & times==18)) %>% 
      rbind(expand.grid(thresholds = c(60, 90, 120),
                        opportunities = c("E17", "E18"),
                        scenario = c("exte"),
                        times = c(17, 18)) %>% 
              filter((opportunities %in% c("E17") & times==17) |
                       (opportunities %in% c("E18") & times==18)))
    
    access_sa_stm <- persons %>% 
      group_by(ID_PESS) %>% 
      summarise()
    
    colnames(access_sa_stm) <- "idp"
            
    # sa_stm function
    sa_stm_f <- function(travel_matrix, travel_cost, land_use_data, opportunity, persons_f, cutoff){
      # travel_matrix =  from_id/ to_id/ travel_time
      # land_use_data =  id / opportunity
      # travel_cost = "travel_time",
      # opportunity = "opportunity",
      # persons_f  = idp / home_location / job_location
      # cutoff = com_scenarios$thresholds[i]
              
      aux <- travel_matrix %>% 
        left_join(travel_matrix,
                  by =c("to_id" = "from_id"))
      
      aux$total_time <- aux$travel_time.x + aux$travel_time.y
      
      aux <- aux %>% 
        filter(total_time <= cutoff) %>% 
        mutate(time_remaining = (cutoff - total_time)) %>% 
        left_join(land_use_data, by= c("to_id" = "id"))
      
      aux$time_opp <- aux$time_remaining * 
        (aux %>% select_at(opportunity))
      
      accessible <- aux %>% 
        group_by(from_id, to_id) %>% 
        summarise_at(c(opportunity, "time_opp"), sum) %>% 
        ungroup()
      
      accessible$from_id <- as.numeric(accessible$from_id)
      accessible$to_id <- as.numeric(accessible$to_id)
      
      persons_f <- persons_f %>% 
        left_join(accessible, by = c("ZONATRA1" = "from_id", "ZONA" = "to_id"))
      
      sastm <- persons_f[, c("idp", opportunity, "time_opp")]
      colnames(sastm)[2] <- opportunity
      colnames(sastm)[3] <- paste0(opportunity,".time")
      
      return(sastm) #dataframe with 3 columns: id , sa_stm , sa_stm_time
    }
            
    #calculating
    for (i in 1:nrow(sa_stm_scenarios)) {
      if (sa_stm_scenarios$scenario[i] %in% c("base", "exte")){
        t_m <- ttm_df
      } else{
        t_m <- ttm_df_prop
      }
      
      sa_stm <- sa_stm_f(travel_matrix = t_m %>% 
                         filter(hour == sa_stm_scenarios$times[i]) %>% 
                         select(-hour) %>% 
                         rename(from_id = fromId,
                                to_id = toId),
                       travel_cost = "travel_time",
                       land_use_data =  zones %>%
                         st_drop_geometry() %>% 
                         rename(id = idz) %>% 
                         select(id, P17, P18, E17, E18, cpd, cpn),
                       opportunity = as.character(sa_stm_scenarios$opportunities[i]),
                       persons_f = persons%>% 
                         select(ID_PESS, ZONA, ZONATRA1) %>% 
                         rename(idp = ID_PESS),
                       cutoff = sa_stm_scenarios$thresholds[i]) 
      
      colnames(sa_stm)[2:3] <-
        paste(colnames(sa_stm)[2:3],
              sa_stm_scenarios$thresholds[i],
              #as.character(sa_stm_scenarios$opportunities[i]),
              sa_stm_scenarios$times[i],
              sa_stm_scenarios$scenario[i],
              sep = "_")
      
      access_sa_stm <- access_sa_stm %>% 
        left_join(sa_stm)
    }
    
    colnames(access_sa_stm)[2:ncol(access_sa_stm)] <-
      paste0("sastm_", colnames(access_sa_stm)[2:ncol(access_sa_stm)])
    
    access_sa_stm[is.na(access_sa_stm)] <- 0
    
    saveRDS(access_sa_stm, "access_sa_stm.rds")
    
  # 4.5 STM - NFOS####
    # 4.5.0 evaluates travel times to parks####
    OD_SP <- read_delim(file="OD_2017.csv", delim=";") %>%#this file cannot be shared at the 4tu due to the personal data
      filter(MUNI_DOM==36, #home in São Paulo
             MUNITRA1==36, MUNITRA2==36 | is.na(MUNITRA2)) #job(s) in São Paulo

    trips <- OD_SP %>% 
      select(ID_PESS, N_VIAG, ZONA_O, ZONA_D, MOTIVO_O, MOTIVO_D, DATA, H_SAIDA, MIN_SAIDA, H_CHEG, MIN_CHEG, TOT_VIAG)
    
    trips$t_i <- as.POSIXct( paste0(trips$DATA, " ", trips$H_SAIDA, ":", trips$MIN_SAIDA, ":00"),
                            format="%d%m%Y %H:%M:%S",tz="UTC")
    trips$t_j <- as.POSIXct( paste0(trips$DATA, " ", trips$H_CHEG, ":", trips$MIN_CHEG, ":00"),
                             format="%d%m%Y %H:%M:%S",tz="UTC")
    
    #first_trip* mark the moment we have the first time after midnight. these trips and the following one(s) must have 1 day added
    aux <- tibble(ID_PESS = trips$ID_PESS,
                  first_trip_after_mn_ti = c(F,
                                             ( trips$t_i[2:nrow(trips)] < trips$t_i[1:(nrow(trips)-1)] ) & # checks if the starting hour of the trips are in order. if not, this means that the last trip(s) strat after midnight
                                               (trips$ID_PESS[2:nrow(trips)] == trips$ID_PESS[1:(nrow(trips)-1)])),
                  first_trip_after_mn_tj = c(F,
                                             ( ( trips$t_j[2:nrow(trips)] < trips$t_j[1:(nrow(trips)-1)] ) & # the same for tj
                                               (trips$ID_PESS[2:nrow(trips)] == trips$ID_PESS[1:(nrow(trips)-1)]) ) |
                                             ( trips$t_j[2:nrow(trips)] < trips$t_i[2:nrow(trips)] ) # besides, it is also possible that the first trip already starts crossing the midnight
                                             ),
                  N_VIAG = trips$N_VIAG)
    
    #add 1 day to those after midnight
    df_ti <- aux %>% 
      filter(first_trip_after_mn_ti==T) %>% 
      group_by(ID_PESS, N_VIAG) %>% 
      summarise() %>% 
      ungroup() %>% 
      rename(first_trip_ti = N_VIAG)
    
    df_tj <- aux %>% 
      filter(first_trip_after_mn_tj==T) %>% 
      group_by(ID_PESS, N_VIAG) %>% 
      summarise() %>% 
      ungroup() %>% 
      rename(first_trip_tj = N_VIAG)
    
    trips <- trips %>% 
      left_join(df_ti) %>% 
      left_join(df_tj)
    
    trips$t_i[replace_na(trips$N_VIAG >= trips$first_trip_ti, F)] <- trips$t_i[replace_na(trips$N_VIAG >= trips$first_trip_ti, F)] + lubridate::days(1)
    trips$t_j[replace_na(trips$N_VIAG >= trips$first_trip_tj, F)] <- trips$t_j[replace_na(trips$N_VIAG >= trips$first_trip_tj, F)] + lubridate::days(1)
    
    rm(df_ti); rm(df_tj); rm(aux)
    
    #time_use show if a person is traveling or in a fixed place
    trips <- trips %>% 
      select(ID_PESS, N_VIAG, ZONA_O, ZONA_D, MOTIVO_O, MOTIVO_D, DATA, t_i, t_j, TOT_VIAG)
    
    time_use <- trips
    
    time_use$type="trip"
    
    # 4.5.1 identify anchors = jobs, education####
    
    # first_created is a way to guarantee an anchor before the first trip, if that is the case
    first_created <- trips %>% 
      filter(N_VIAG==1)# this filters the first trip of each person
    
    first_created$ZONA_D <- first_created$ZONA_O
    first_created$MOTIVO_D <- first_created$MOTIVO_O
    first_created$t_j <- first_created$t_i
    first_created$t_i <- as.POSIXct( paste0(first_created$DATA, " ", "00:00:01"),
                                     format="%d%m%Y %H:%M:%S",tz="UTC")
    
    # last_created is a way to guarantee an anchor after the last trip, if that is the case
    last_created <- trips %>% 
      filter(N_VIAG==TOT_VIAG)# this filters the last trip of each person
    
    last_created$ZONA_O <- last_created$ZONA_D
    last_created$MOTIVO_O <- last_created$MOTIVO_D
    last_created$t_i <- last_created$t_j
    last_created$t_j <- pmax(last_created$t_j,
                            as.POSIXct( paste0(last_created$DATA, " ", "23:59:59"),
                                        format="%d%m%Y %H:%M:%S",tz="UTC"))
    
    #creates stays df
    x <- trips[1 : (nrow(trips)-1), ]
    colnames(x) <- paste0(colnames(x), ".x")
    y <- trips[2 : nrow(trips)    , ]
    colnames(y) <- paste0(colnames(y), ".y")
    
    stays <- cbind(x,y)%>% 
      filter(ID_PESS.x==ID_PESS.y) %>% 
      select(ID_PESS = ID_PESS.x, ZONA_O = ZONA_D.x, ZONA_D = ZONA_D.x,
             MOTIVO_O = MOTIVO_D.x, MOTIVO_D = MOTIVO_D.x, DATA = DATA.x, t_i = t_j.x, t_j = t_i.y)
    
    stays$N_VIAG <- 0
    stays$TOT_VIAG <- 0
    
    stays <- stays %>% 
      rbind(first_created) %>% 
      rbind(last_created)
    
    stays$type <- "stay"
    
    time_use <- time_use %>% 
      rbind(stays)
    
    time_use <- time_use[order(time_use$ID_PESS, time_use$t_i, time_use$t_j), ]
    
    rm(x); rm(y); rm (stays); rm(trips); rm(first_created); rm(last_created)
    
    #creates anchors df, from stays
    time_use$anchor <- (time_use$type =="stay") &
      (time_use$MOTIVO_O %in% c(1,2,3,4)) #1,2,3 = jobs; 4=education; 8=home
    
    time_use$duration_minu <- as.numeric(time_use$t_j - time_use$t_i)/60
      
    #4.5.2 calculates freetime based on the anchors####
    time_use$group <- 1 # group will be used to merge trips/stays that can be used as free time
    
    for (i in 2:nrow(time_use)) {
      time_use$group[i] <- ifelse((time_use$ID_PESS[i] != time_use$ID_PESS[i-1]) | (time_use$anchor[i]==T) | (time_use$anchor[i-1]==T),
                                  time_use$group[i-1] +1,
                                  time_use$group[i-1])
    }
    
    saveRDS(time_use, "time_use.rds")
    
    free_time <- time_use %>%
      filter(anchor==F, !is.na(t_i)) %>% 
      group_by(ID_PESS, group) %>% 
      summarise(t_i=min(t_i),
                t_j=max(t_j)) %>% 
      ungroup()
    
    #zone info comes from joins based on time
    free_time <- free_time %>% 
      left_join(time_use %>% 
                  filter(time_use$duration_minu>0) %>% 
                  select(ID_PESS, group, t_i, ZONA_O)) %>% 
      left_join(time_use %>%  
                  filter(time_use$duration_minu>0) %>% 
                  select(ID_PESS, group, t_j, ZONA_D))
    
    colnames(free_time)[colnames(free_time)=="ZONA_O"] <- "ZONA_D.x"
    colnames(free_time)[colnames(free_time)=="ZONA_D"] <- "ZONA_D.y"
    
    #calcula freetime e hourtfx
    free_time$hour_tfx <- hour(free_time$t_i)
    free_time$free_time <- as.numeric(free_time$t_j - free_time$t_i)/60 #converts to minutes
    
    free_time$ZONA_D.x <- as.character(free_time$ZONA_D.x)
    free_time$ZONA_D.y <- as.character(free_time$ZONA_D.y)
    
    saveRDS(free_time, "free_time.rds")
    
    # 4.5.3 calculates FOS####
      # split in periods because of memory restrictions
        periods <- tibble(tmin=c(0, 5, 7,  9, 11, 13, 15, 17, 19, 21, 23), #cpd 8-18; cpn-18-2; parks 5-24
                          tmax=c(2, 7, 9, 11, 13, 15, 17, 19, 21, 23, 24)) #nothing open 2-5
    
      # 4.5.3.1 creates df with triangulated travel times
      for (scenario in c("base","prop")) {
        for (i in 1:nrow(periods)) { 
          if (scenario == "base" ){
            t_m <- ttm_df
          } else{
            t_m <- ttm_df_prop
          }
          
          #limits travel time 
          x <- t_m %>% 
            filter(travel_time<=2*60, hour>= periods$tmin[i], hour< periods$tmax[i])
          
          aux <- x %>% 
            left_join(x, by =c("hour", "toId" = "fromId"))
          
          rm(x)
          
          aux <- aux %>% 
            filter(travel_time.x + travel_time.y<=3*60) #two trips<=3*60min
          
          aux <- aux %>% 
            mutate(total_time = travel_time.x + travel_time.y) %>% 
            select(-travel_time.x, -travel_time.y)
          
          saveRDS(aux, paste0("time_join_time_",
                              if_else(scenario == "base", "","prop_"),
                              periods$tmin[i],"_",periods$tmax[i],".rds"))  
        }
      }
      rm(t_m); rm(aux)
      
      access_fos <- persons %>% 
        group_by(ID_PESS) %>% 
        summarise()
      
      # 4.5.3.2 includes opportunities and measure accessibilities
      for (scenario in c("base", "prop")) {
        for (i in 1:nrow(periods)) {
          aux <- readRDS(paste0("time_join_time_",
                                if_else(scenario == "base", "", "prop_"),
                                periods$tmin[i], "_", periods$tmax[i], ".rds"))
          
          aux <- aux %>% 
            left_join(zones, by= c("toId" = "idz")) 
          
          x <- free_time %>% 
            left_join(aux, by=c("hour_tfx" = "hour",
                                "ZONA_D.x" = "fromId",
                                "ZONA_D.y" = "toId.y")) %>% 
            filter(total_time <= free_time)
          
          rm(aux)
          
          # sums the parks if they are open at the considered hour
            x$park <- x[, "P5"] * (x$hour_tfx==5) # hours are necessarily within the period due to previous filter
            x$park_time <- x$park * (x$free_time - x$total_time)
            for (hour in 6:23) {
              x$park <- x$park +
                x[, paste0("P",hour)] * (x$hour_tfx==hour)
              x$park_time <- x$park_time+
                x[, paste0("P",hour)] * (x$hour_tfx==hour) * (x$free_time - x$total_time)
            }
          
          # sums the cp's if they are open at the considered hour
            # three cases here: 8h-18h, 18h-2h, 2h-8h. for the last, cp and cp_time are zero:
            x$cp <- 0
            x$cp_time <- 0
            
            #for 8h-18h:
            x$cp[x$hour_tfx >= 8 & x$hour_tfx<18] <-
              x$cpd[x$hour_tfx >= 8 & x$hour_tfx<18]
            x$cp_time[x$hour_tfx >= 8 & x$hour_tfx<18] <-
              x$cpd[x$hour_tfx >= 8 & x$hour_tfx<18] *
              (x$free_time[x$hour_tfx >= 8 & x$hour_tfx<18] -
                 x$total_time[x$hour_tfx >= 8 & x$hour_tfx<18])
            
            #for 18h-2h
            x$cp[x$hour_tfx >=18 | x$hour_tfx<2] <-
              x$cpn[x$hour_tfx >=18 | x$hour_tfx<2]
            x$cp_time[x$hour_tfx >=18 | x$hour_tfx<2] <-
              x$cpn[x$hour_tfx >=18 | x$hour_tfx<2] *
              (x$free_time[x$hour_tfx >=18 | x$hour_tfx<2] -
                 x$total_time[x$hour_tfx >=18 | x$hour_tfx<2])  
          
          
          #summarise the results
          fos <- x %>% #previously named personsstm
            group_by(ID_PESS) %>% 
            summarise_at(c("cp","park","cp_time","park_time"), sum)
          
          colnames(fos)[2:5] <-
            paste(colnames(fos)[2:5],
                  scenario,
                  periods$tmin[i],
                  periods$tmax[i],
                  sep = "_")
          
          access_fos <- access_fos %>% 
            left_join(fos)
          
          rm(x); rm(fos)
        }
      }
      
      rm(free_time)
      
      #4.5.3.3 sum results from the different periods
      access_fos[is.na(access_fos)] <- 0
      
      periods$paste <- paste0(periods$tmin, "_", periods$tmax)
      
      access_fos$cp_base <- rowSums(access_fos[, paste0("cp_base_", periods$paste)])
      access_fos$park_base <- rowSums(access_fos[, paste0("park_base_", periods$paste)])
      access_fos$cp_time_base <- rowSums(access_fos[, paste0("cp_time_base_", periods$paste)])
      access_fos$park_time_base <- rowSums(access_fos[, paste0("park_time_base_", periods$paste)])
      access_fos$cp_prop <- rowSums(access_fos[, paste0("cp_prop_", periods$paste)])
      access_fos$park_prop <- rowSums(access_fos[, paste0("park_prop_", periods$paste)])
      access_fos$cp_time_prop <- rowSums(access_fos[, paste0("cp_time_prop_", periods$paste)])
      access_fos$park_time_prop <- rowSums(access_fos[, paste0("park_time_prop_", periods$paste)])
      
      
      colnames(access_fos)[2:ncol(access_fos)] <-
        paste0("fos_", colnames(access_fos)[2:ncol(access_fos)])
      
      access_fos <- access_fos %>% 
        select(ID_PESS,
               fos_cp_base, fos_park_base, fos_cp.time_base = fos_cp_time_base, fos_park.time_base = fos_park_time_base,
               fos_cp_prop, fos_park_prop, fos_cp.time_prop = fos_cp_time_prop, fos_park.time_prop = fos_park_time_prop)
      access_fos <- access_fos %>% 
        rename(fos_cp.time_base = fos_cp_time_base, fos_park.time_base = fos_park_time_base,
               fos_cp.time_prop = fos_cp_time_prop, fos_park.time_prop = fos_park_time_prop)
      
      saveRDS(access_fos, "access_fos.rds")
  
  # 4.6 adds access to persons and zones####
    # 4.6.1 adds access to zones
      zones_access <- zones %>% 
        left_join(access_com, by=c("idz"="idz")) %>% 
        left_join(access_bt, by=c("idz"="idz")) %>% 
        left_join(access_fca, by=c("idz"="idz"))
      
      #calculates the difference between proposed and base scenarios
      xx <- zones_access %>% 
        st_drop_geometry() %>% 
        select(idz, ends_with("prop")) %>% #idz included only to check if the order of the zones was the same on the 2 subtracted dfs
        mutate(idz = as.numeric(idz))- 
        zones_access %>% 
        st_drop_geometry() %>% 
        select(idz, ends_with("base")) %>% 
        mutate(idz = as.numeric(idz))
      
      xx$idz <- NULL
      
      colnames(xx) <- paste0(str_sub(colnames(xx),1,-5),"diff")
      
      zones_access <- zones_access %>% 
        cbind(xx)
      
      rm(xx)
      
      #calculates the difference between extended and base scenarios
      xx <- zones_access %>% 
        st_drop_geometry() %>% 
        select(idz, ends_with("exte")) %>% #idz included only to check if the order of the zones was the same on the 2 subtracted dfs
        mutate(idz = as.numeric(idz))- 
        zones_access %>% 
        st_drop_geometry() %>% 
        select(idz, ends_with("base")) %>% 
        select(idz, contains("_P")) %>% # this row is included in comparison to prop-base
        mutate(idz = as.numeric(idz))
      
      xx$idz <- NULL
      
      colnames(xx) <- paste0(str_sub(colnames(xx),1,-5),"difE")
      
      zones_access <- zones_access %>% 
        cbind(xx)
      
      rm(xx)
      
    # 4.6.2 adds access to persons
      persons_access <- persons_access %>% 
        left_join(persons) %>% 
        left_join(access_sa_stm, by=c("ID_PESS"="idp")) %>% 
        left_join(access_fos, by=c("ID_PESS"="ID_PESS"))
      
      #calculates the difference between proposed and base scenarios
      xx <- persons_access %>% 
        st_drop_geometry() %>% 
        select(ID_PESS, ends_with("prop")) %>% #ID_PESS included only to check if the order of the zones was the same on the 2 subtracted dfs
        mutate(ID_PESS = as.numeric(ID_PESS))- 
        persons_access %>% 
        st_drop_geometry() %>% 
        select(ID_PESS, ends_with("base")) %>% 
        mutate(ID_PESS = as.numeric(ID_PESS))
      
      xx$ID_PESS <- NULL
      
      colnames(xx) <- paste0(str_sub(colnames(xx),1,-5),"diff")
      
      persons_access <- persons_access %>% 
        cbind(xx)
      
      rm(xx)
      
      #calculates the difference between extended and base scenarios
      xx <- persons_access %>% 
        st_drop_geometry() %>% 
        select(ID_PESS, ends_with("exte")) %>% #ID_PESS included only to check if the order of the zones was the same on the 2 subtracted dfs
        mutate(ID_PESS = as.numeric(ID_PESS))- 
        persons_access %>% 
        st_drop_geometry() %>% 
        select(ID_PESS, ends_with("base")) %>% 
        select(ID_PESS, contains("_P", ignore.case = F)) %>% # this row is included in comparison to prop-base
        mutate(ID_PESS = as.numeric(ID_PESS))
      
      xx$ID_PESS <- NULL
      
      colnames(xx) <- paste0(str_sub(colnames(xx),1,-5),"difE")
      
      persons_access <- persons_access %>% 
        cbind(xx)
      
      rm(xx)
      
      #4.6.3 add person to zone and zone to person
      #4.6.3.1 zone to person
      persons_access$ZONA <- as.character(persons_access$ZONA)
      
      persons_access <- persons_access %>% 
        left_join(zones_access %>% 
                    st_drop_geometry() %>% 
                    select(idz, starts_with("bt"), starts_with("com"), starts_with("fca")),
                  by=c("ZONA"="idz"))
      
      saveRDS(persons_access, "persons_access.rds")
      
      #4.6.3.2 person to zone, as median
      aux <- persons_access %>% 
        select(ZONA, starts_with("sastm"), starts_with("fos")) %>%
        select(!contains("diff")) %>% 
        select(!contains("difE")) %>% 
        group_by(ZONA) %>% 
        summarise_all(list(~median(., na.rm=T)))
      
      colnames(aux)[colnames(aux)!="ZONA"] <- paste0("median_", colnames(aux)[colnames(aux)!="ZONA"])
      
      zones_access <- zones_access %>% 
        left_join(aux, by=c("idz" = "ZONA"))
      
      rm(aux)
      
      #instead of using the median of the diff, we use the diff of the median
      xx <- zones_access %>% 
        st_drop_geometry() %>% 
        select(idz, ends_with("prop")) %>% #idz included only to check if the order of the zones was the same on the 2 subtracted dfs
        mutate(idz = as.numeric(idz))- 
        zones_access %>% 
        st_drop_geometry() %>% 
        select(idz, ends_with("base")) %>% 
        mutate(idz = as.numeric(idz))
      
      xx$idz <- NULL
      
      colnames(xx) <- paste0(str_sub(colnames(xx),1,-5),"diff")
      
      xx <- xx %>% 
        select(!starts_with("com")) %>% 
        select(!starts_with("bt")) %>% 
        select(!starts_with("fca"))
        
      zones_access <- zones_access %>% 
        cbind(xx)
      
      rm(xx)
      
      # repeats for extended
      xx <- zones_access %>% 
        st_drop_geometry() %>% 
        select(idz, ends_with("exte")) %>% #idz included only to check if the order of the zones was the same on the 2 subtracted dfs
        mutate(idz = as.numeric(idz)) - 
        zones_access %>% 
        st_drop_geometry() %>% 
        select(idz, ends_with("base")) %>% 
        select(idz, contains("_P", ignore.case = F)) %>%
        mutate(idz = as.numeric(idz))
      
      xx$idz <- NULL
      
      colnames(xx) <- paste0(str_sub(colnames(xx),1,-5),"difE")
      
      xx <- xx %>% 
        select(!starts_with("com")) %>% 
        select(!starts_with("bt")) %>% 
        select(!starts_with("fca"))
      
      zones_access <- zones_access %>% 
        cbind(xx)
      
      rm(xx)
      
      saveRDS(zones_access, "zones_access.rds")
      
# 4.7 map measures and map contrasting####
  #all the maps have n_intervals breaks, between zero and the max value
  n_intervals=5 #number of intervals for the maps
      
  # 4.7.1 COM####
    com_scenarios$scenario_print <- as.character(com_scenarios$scenario)
    com_scenarios$scenario_print[com_scenarios$scenario_print=="prop"] <- "diff"
    com_scenarios$scenario_print[com_scenarios$scenario_print=="exte"] <- "difE"
    
    com_scenarios$opportunities_print <- as.character(com_scenarios$opportunities)
    com_scenarios$opportunities_print[com_scenarios$opportunities_print=="P17" | 
                                        com_scenarios$opportunities_print=="P18"] <- "_P"
    com_scenarios$opportunities_print[com_scenarios$opportunities_print=="E17" | 
                                        com_scenarios$opportunities_print=="E18"] <- "_E"
    com_scenarios$opportunities_print[com_scenarios$opportunities_print=="cpd" | 
                                        com_scenarios$opportunities_print=="cpn"] <- "_cp"
    
    for (i in 1:nrow(com_scenarios)) {
      column <- paste("com", com_scenarios$thresholds[i], com_scenarios$opportunities[i],
                      com_scenarios$scenario_print[i], sep = "_")
      
      breaks_results <- breaks_func(df=zones_access, variab=column,
                                    df_scenarios=com_scenarios, i=i,
                                    n_breaks=n_intervals, measure="com")
      zones_access$breaks <- breaks_results$breaks
      
      ggplot() +
        geom_sf(data=zones_access, color="gray", fill="gray") +
        geom_sf(data=zones_access %>% filter(urban==T), aes(fill=breaks))+
        scale_fill_manual(
          values= color_values(com_scenarios$scenario_print[i], length(breaks_results$labels)),
          breaks=rev(breaks_results$breaks_scale),
          drop=F,
          labels=breaks_results$labels
        )+
        ggmap::theme_nothing(legend=TRUE) +
        labs(x="", y="",
             fill=paste0("CUM ",com_scenarios$thresholds[i],"min (", com_scenarios$scenario_print[i], ")")) +
        theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
        xlim(-46.82, -46.35)+
        ylim(-23.8 , -23.38)+
        annotation_scale(location = "bl", width_hint = 0.5) +
        annotation_north_arrow(location = "tr", which_north = "true",
                               style = north_arrow_fancy_orienteering)
      
      ggsave(paste0("figures/p2-",column,".png"), width=5, height = 5, dpi = 300)
    }
    write.table(com_scenarios, "clipboard", sep="\t", row.names=FALSE)
    
  # 4.7.2 BT####
    bt_scenarios$scenario_print <- as.character(bt_scenarios$scenario)
    bt_scenarios$scenario_print[bt_scenarios$scenario_print=="prop"] <- "diff"
    bt_scenarios$scenario_print[bt_scenarios$scenario_print=="exte"] <- "difE"
    bt_scenarios$opportunities_print <- "_P"
    
    n_intervals=5
    
    for (i in 1:nrow(bt_scenarios)) {
      column <- paste("bt", bt_scenarios$opportunities[i], bt_scenarios$scenario_print[i], sep = "_")
      
      breaks_results <- breaks_func(df=zones_access, variab=column,
                                    df_scenarios=bt_scenarios, i=i,
                                    n_breaks=n_intervals, measure="bt")
      
      zones_access$breaks <- breaks_results$breaks
      
      ggplot() +
        geom_sf(data=zones_access, color="gray", fill="gray") +
        geom_sf(data=zones_access %>% filter(urban==T), aes(fill=breaks))+
        scale_fill_manual(
          values= color_values(bt_scenarios$scenario_print[i], length(breaks_results$labels), dir=1),
          breaks=rev(breaks_results$breaks_scale),
          drop=F,
          labels=breaks_results$labels
        )+
        ggmap::theme_nothing(legend=TRUE) +
        labs(x="", y="",
             fill=paste0("BT (", bt_scenarios$scenario_print[i], ")")) +
        theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
        xlim(-46.82, -46.35)+
        ylim(-23.8 , -23.38)+
        annotation_scale(location = "bl", width_hint = 0.5) +
        annotation_north_arrow(location = "tr", which_north = "true",
                               style = north_arrow_fancy_orienteering)
      
      ggsave(paste0("figures/p2-",column,".png"), width=5, height = 5, dpi = 300)
    }
    
    write.table(bt_scenarios, "clipboard", sep="\t", row.names=FALSE)
  
    # 4.7.3 FCA####
    fca_scenarios$scenario_print <- as.character(fca_scenarios$scenario)
    fca_scenarios$scenario_print[fca_scenarios$scenario_print=="prop"] <- "diff"
    fca_scenarios$scenario_print[fca_scenarios$scenario_print=="exte"] <- "difE"
    
    fca_scenarios$opportunities_print <- as.character(fca_scenarios$opportunities)
    fca_scenarios$opportunities_print[fca_scenarios$opportunities_print=="P17" | 
                                        fca_scenarios$opportunities_print=="P18"] <- "_P"
    fca_scenarios$opportunities_print[fca_scenarios$opportunities_print=="E17" | 
                                        fca_scenarios$opportunities_print=="E18"] <- "_E"
    fca_scenarios$opportunities_print[fca_scenarios$opportunities_print=="cpd" | 
                                        fca_scenarios$opportunities_print=="cpn"] <- "_cp"
    
    n_intervals <- 5
    
    for (i in 1:nrow(fca_scenarios)) {
      column <- paste("fca", fca_scenarios$thresholds[i], fca_scenarios$opportunities[i], fca_scenarios$scenario_print[i], sep = "_")
      
      breaks_results <- breaks_func(zones_access, column, fca_scenarios, i,
                                    n_intervals, "fca")
      zones_access$breaks <- breaks_results$breaks
    
        ggplot() +
          geom_sf(data=zones_access, color="gray", fill="gray") +
          geom_sf(data=zones_access %>% filter(urban==T), aes(fill=breaks))+
          scale_fill_manual(
            values= color_values(fca_scenarios$scenario_print[i], length(breaks_results$labels)),
            breaks=rev(breaks_results$breaks_scale),
            drop=F,
            labels=breaks_results$labels
          )+
          ggmap::theme_nothing(legend=TRUE) +
          labs(x="", y="",
               fill=paste0("2FCA ",fca_scenarios$thresholds[i],"min (", fca_scenarios$scenario_print[i], ")")) +
          theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
          xlim(-46.82, -46.35)+
          ylim(-23.8 , -23.38)+
          annotation_scale(location = "bl", width_hint = 0.5) +
          annotation_north_arrow(location = "tr", which_north = "true",
                                 style = north_arrow_fancy_orienteering)
      
      ggsave(paste0("figures/p2-",column,".png"), width=5, height = 5, dpi = 300)
    }
    
    write.table(fca_scenarios, "clipboard", sep="\t", row.names=FALSE)
    
  # 4.7.4 SA_STM####  
    sa_stm_scenarios$scenario_print <- as.character(sa_stm_scenarios$scenario)
    sa_stm_scenarios$scenario_print[sa_stm_scenarios$scenario_print=="prop"] <- "diff"
    sa_stm_scenarios$scenario_print[sa_stm_scenarios$scenario_print=="exte"] <- "difE"
    
    sa_stm_scenarios$opportunities_print <- as.character(sa_stm_scenarios$opportunities)
    sa_stm_scenarios$opportunities_print[sa_stm_scenarios$opportunities_print=="P17" | 
                                           sa_stm_scenarios$opportunities_print=="P18"] <- "_P"
    sa_stm_scenarios$opportunities_print[sa_stm_scenarios$opportunities_print=="E17" | 
                                        sa_stm_scenarios$opportunities_print=="E18"] <- "_E"
    sa_stm_scenarios$opportunities_print[sa_stm_scenarios$opportunities_print=="cpd" | 
                                           sa_stm_scenarios$opportunities_print=="cpn"] <- "_cp"
    
    sa_stm_scenarios$time_marker <- "count" # saves count if the opportunity doesn't measure time or "time" if it does
    
    #x is a copy, but changing opportunities and time_marker
    x <- sa_stm_scenarios
    x$opportunities <- paste0(x$opportunities,".time")
    
    x$time_marker <- "time"
    
    sa_stm_scenarios <- sa_stm_scenarios %>% 
      rbind(x)
    
    n_intervals = 5
    
    for (i in 1:nrow(sa_stm_scenarios)) {
      column <- paste("median_sastm", sa_stm_scenarios$opportunities[i], sa_stm_scenarios$thresholds[i],
                      sa_stm_scenarios$times[i], sa_stm_scenarios$scenario_print[i], sep = "_")
      
      #function is not working here for some reason. the code below is the copy of the function
      n_intervals = 5
      
      df=zones_access; variab=column;
      df_scenarios=sa_stm_scenarios; i=i;
      n_breaks=n_intervals; measure="sastm"
      
      if ((df_scenarios$scenario[i]=="base") & (measure!="fos")) {
        x <- df %>%
          st_drop_geometry() %>%
          filter(urban==T) %>%
          select(starts_with(paste0(ifelse(measure %in% c("sastm", "fos"),"median_" ,"" ),
                                    measure))) %>% 
          select(ends_with(df_scenarios$scenario_print[i]))
        
        if (measure %in% c("com","fca", "sastm")) {
          x <- x %>% 
            select(contains(as.character(df_scenarios$thresholds[i]))) %>% 
            select(contains(df_scenarios$opportunities_print[i]))
        }
        
        if (measure=="sastm") {
          if (sa_stm_scenarios$time_marker[i]=="time") {
            x <- x %>% select(contains("time"))
          }else {
            x <- x %>% select(names(x), -contains("time"))
          }
        }
        
        x <- gather(x)
        x <- x$value
        
      } else if ((measure!="fos") &
                 (df_scenarios$opportunities_print[i] %in% c("_P", "_E"))) {
        x <- df %>%
          st_drop_geometry() %>%
          filter(urban==T) %>%
          select(starts_with(paste0(ifelse(measure %in% c("sastm", "fos"),"median_" ,"" ),
                                    measure))) %>% 
          select(contains("diff"), contains("difE")) %>% 
          select(contains(as.character(df_scenarios$times[i])))
        
        if (measure %in% c("com","fca", "sastm")) {
          x <- x %>% 
            select(contains(as.character(df_scenarios$thresholds[i])))
        }
        
        if (measure=="sastm") {
          if (sa_stm_scenarios$time_marker[i]=="time") {
            x <- x %>% select(contains("time"))
          }else {
            x <- x %>% select(names(x), -contains("time"))
          }
        }
        
        x <- gather(x)
        x <- x$value
        
      } else {
        x <- (df %>% st_drop_geometry() %>% filter(urban==T))[, variab]
      }
      
      x[is.infinite(x)] <- 500
      breaks <- unique(round_mag(getJenksBreaks(x, n_breaks+1),1))
      
      n_intervals <- length(breaks)-1
      
      if (sum(x^2, na.rm = T)==0) {
        next
      }
      
      df$breaks <- cut((df %>% st_drop_geometry())[, variab],
                       breaks=breaks, include.lowest = TRUE, labels=breaks[2:(n_intervals+1)])
      df$breaks[is.na(df$breaks)] <- levels(df$breaks)[1]
      
      breaks_scale <- breaks[2:(n_intervals+1)]
      labels <- rev(paste0(breaks[2:(n_intervals+1)]," - ", breaks[1 :(n_intervals) ]))
      
      breaks_results <- list("breaks" = df$breaks, "breaks_scale" = breaks_scale, "labels" = labels)
      #end of copied function
      
      rm(df); rm(variab); rm(df_scenarios); rm(n_breaks); rm(measure)
      
      zones_access$breaks <- breaks_results$breaks
      
      ggplot() +
        geom_sf(data=zones_access, color="gray", fill="gray") +
        geom_sf(data=zones_access %>% filter(urban==T), aes(fill=breaks))+
        scale_fill_manual(
          values= color_values(sa_stm_scenarios$scenario_print[i], length(breaks_results$labels)),
          breaks=rev(breaks_results$breaks_scale),
          drop=F,
          labels=breaks_results$labels
        )+
        ggmap::theme_nothing(legend=TRUE) +
        labs(x="", y="",
             fill=paste0("SA_STM ",sa_stm_scenarios$thresholds[i],"min (", sa_stm_scenarios$scenario_print[i], ")")) +
        theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
        xlim(-46.82, -46.35)+
        ylim(-23.8 , -23.38)+
        annotation_scale(location = "bl", width_hint = 0.5) +
        annotation_north_arrow(location = "tr", which_north = "true",
                               style = north_arrow_fancy_orienteering)
      
      ggsave(paste0("figures/p2-",column,".png"), width=5, height = 5, dpi = 300)
    }
    
    write.table(sa_stm_scenarios, "clipboard", sep="\t", row.names=FALSE)
    
  
  #4.7.5 STM-NFOS####
    fos_scenarios <- expand.grid(opportunities = c("cp", "park","cp.time", "park.time"),
                                 scenario = c("base", "diff"),
                                 stringsAsFactors = F)
    
    fos_scenarios$scenario_print <- fos_scenarios$scenario
    fos_scenarios$opportunities_print <- fos_scenarios$opportunities
    
    n_intervals <- 5
    
    for (i in c(1:4,6,8)) {#1:nrow(fos_scenarios) not all the scenarios have changes
      column <- paste("median_fos", fos_scenarios$opportunities[i], fos_scenarios$scenario_print[i], sep = "_")
      
      breaks_results <- breaks_func(zones_access, column, fos_scenarios, i,
                                    n_intervals, "fos")
      
      zones_access$breaks <- breaks_results$breaks
      
      ggplot() +
        geom_sf(data=zones_access, color="gray", fill="gray") +
        geom_sf(data=zones_access %>% filter(urban==T), aes(fill=breaks))+
        scale_fill_manual(
          values= color_values(fos_scenarios$scenario_print[i], length(breaks_results$labels)),
          breaks=rev(breaks_results$breaks_scale),
          drop=F,
          labels=breaks_results$labels
        )+
        ggmap::theme_nothing(legend=TRUE) +
        labs(x="", y="",
             fill=paste0("nFOS (", fos_scenarios$scenario_print[i], ")")) +
        theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
        xlim(-46.82, -46.35)+
        ylim(-23.8 , -23.38)+
        annotation_scale(location = "bl", width_hint = 0.5) +
        annotation_north_arrow(location = "tr", which_north = "true",
                               style = north_arrow_fancy_orienteering)
      
      ggsave(paste0("figures/p2-",column,".png"), width=5, height = 5, dpi = 300)
    }
    
    write.table(fos_scenarios, "clipboard", sep="\t", row.names=FALSE)

  #4.8 distribution of stms####
    scenarios <- bt_scenarios %>% 
      mutate(measure = "bt",
             opportunities_print = "-",
             thresholds = -1,
             time_marker = "-") %>% 
      rbind(com_scenarios %>% 
              mutate(measure = "com",
                     time_marker = "-"))%>% 
      rbind(fca_scenarios %>% 
              mutate(measure = "fca",
                     time_marker = "-"))%>% 
      rbind(sa_stm_scenarios %>% 
              mutate(measure = "sa_stm"))%>% 
      rbind(fos_scenarios %>% 
              mutate(measure = "fos",
                     thresholds = -1,
                     time_marker = "-",
                     times = -1))
    
    scenarios$scenario[scenarios$measure=="fos" & scenarios$scenario=="diff"] <- "prop"
    scenarios$time_marker[scenarios$measure=="fos" &
                            (scenarios$opportunities %in% c("cp.time", "park.time"))] <- 
      "time"
    
    #add column measure_title, with the same format of persons_access
    scenarios$measure_title <- "x"
    scenarios$measure_title[scenarios$measure=="com"] <-
      paste("com", scenarios$thresholds[scenarios$measure=="com"],
            scenarios$opportunities[scenarios$measure=="com"],
            scenarios$scenario[scenarios$measure=="com"], sep = "_")
    
    scenarios$measure_title[scenarios$measure=="bt"] <-
      paste("bt", scenarios$opportunities[scenarios$measure=="bt"],
            scenarios$scenario[scenarios$measure=="bt"], sep = "_")
    
    scenarios$measure_title[scenarios$measure=="fca"] <-
      paste("fca", scenarios$thresholds[scenarios$measure=="fca"],
            scenarios$opportunities[scenarios$measure=="fca"],
            scenarios$scenario[scenarios$measure=="fca"], sep = "_")
    
    scenarios$measure_title[scenarios$measure=="sa_stm"] <-
      paste("sastm", scenarios$opportunities[scenarios$measure=="sa_stm"],
            scenarios$thresholds[scenarios$measure=="sa_stm"],
            scenarios$times[scenarios$measure=="sa_stm"],
            scenarios$scenario[scenarios$measure=="sa_stm"], sep = "_")
    
    scenarios$measure_title[scenarios$measure=="fos"] <-
      paste("fos",
            str_replace(scenarios$opportunities[scenarios$measure=="fos"], "_count", "") ,
            scenarios$scenario[scenarios$measure=="fos"],
            sep = "_")
    
    x <- scenarios
    x$opportunities[x$opportunities=="P17"] <- "park"
    x$opportunities[x$opportunities=="E17"] <- "park"
    x$opportunities[x$opportunities=="P18"] <- "park"
    x$opportunities[x$opportunities=="E18"] <- "park"
    x$opportunities[x$opportunities=="cpd"] <- "cp"
    x$opportunities[x$opportunities=="cpn"] <- "cp"
    x$opportunities[x$opportunities=="P17.time"] <- "park.time"
    x$opportunities[x$opportunities=="E17.time"] <- "park.time"
    x$opportunities[x$opportunities=="P18.time"] <- "park.time"
    x$opportunities[x$opportunities=="E18.time"] <- "park.time"
    x$opportunities[x$opportunities=="cpd.time"] <- "cp.time"
    x$opportunities[x$opportunities=="cpn.time"] <- "cp.time"
    
    x <- x %>% 
      select(measure, opportunities, times, scenario, thresholds) %>% 
      spread(thresholds, thresholds) %>% 
      unite("thresholds",`-1`:`120`, na.rm = T, sep=", ") %>% 
      spread(scenario, thresholds)
    
    write.table(x, "clipboard", sep="\t", row.names=FALSE)
    
    # 4.8.1 share of zeros####
    scenarios$share_zeros <- 0
    
    for (i in 1:nrow(scenarios)) {
      scenarios$share_zeros[i] <- sum(persons_access[,scenarios$measure_title[i]]==0,na.rm = T)/
        nrow(persons_access)
    }
    
    x <- scenarios %>% 
      filter(scenario=="base", measure %in% c("sa_stm", "fos"), time_marker %in% c("-", "count")) %>% 
      select(measure, opportunities, thresholds, share_zeros)
    
    write.table(x, "clipboard", sep="\t", row.names=FALSE)
    
  # 4.9 correlation matrices of changes####
    # 4.9.1 L15 park
      my_data <- persons_access %>%
        select(ends_with("diff")) %>% 
        select(contains("_P")) %>% 
        select(!starts_with("bt"))
      
      res2 <- rcorr(as.matrix(my_data))
      
      jpeg(file="figures/p2-corr_matrix-park_diff.jpeg", width = 800, height = 800)
      
      corrplot(res2$r, type="upper", #order="hclust", 
               p.mat = res2$P, sig.level = 0.01, insig = "blank",
               tl.pos = "n")
      
      dev.off()
      
      # 4.9.2 park extended hours
      my_data <- persons_access %>%
        select(ends_with("difE")) %>% 
        select(contains("_E")) %>% 
        select(!starts_with("bt"))
      
      res2 <- rcorr(as.matrix(my_data))
      
      jpeg(file="figures/p2-corr_matrix-park_difE.jpeg", width = 800, height = 800)
      
      corrplot(res2$r, type="upper", #order="hclust", 
               p.mat = res2$P, sig.level = 0.01, insig = "blank",
               tl.pos = "n")
      
      dev.off()
      
      # 4.9.3 cult 
      my_data <- persons_access %>%
        select(ends_with("diff")) %>% 
        select(contains("_cp")) %>% 
        select(!starts_with("bt"))
      
      res2 <- rcorr(as.matrix(my_data))
      
      jpeg(file="figures/p2-corr_matrix-cp_diff.jpeg", width = 800, height = 800)
      
      corrplot(res2$r, type="upper", #order="hclust", 
               p.mat = res2$P, sig.level = 0.01, insig = "blank",
               tl.pos = "n")
      
      dev.off()
      
# 5 EQUITY ANALYSES####
  # 5.1 Lorenz####
    # 5.1.1 gini indexes and palma ratios####
    persons_access <- persons_access %>% #getting only NomeDistri
      left_join(zones %>% 
                  st_drop_geometry() %>% 
                  select(idz, NomeDistri),
                by=c("ZONA"="idz"))
    
    persons_access$selec <- (persons_access$NomeDistri %in% selec_zones)
      
    for (i in 1:nrow(scenarios)) {
      scenarios$gini_all[i] <- gini(persons_access[ , scenarios$measure_title[i]],
                                    weights=persons_access$FE_PESS)
      scenarios$gini_selec[i] <- gini(persons_access[ persons_access$selec==T,
                                                      scenarios$measure_title[i]],
                                      weights=persons_access[persons_access$selec==T, "FE_PESS"])
      scenarios$palma_all[i] <- palma_ratio(persons_access[ , scenarios$measure_title[i]],
                                            persons_access$RENDA_FA,
                                            persons_access$FE_PESS)
      scenarios$palma_selec[i] <- palma_ratio(persons_access[ persons_access$selec==T,scenarios$measure_title[i]],
                                              persons_access[persons_access$selec==T, "RENDA_FA"],
                                              persons_access[persons_access$selec==T, "FE_PESS"])
    }
    
    scenarios$opportunities <- str_replace(scenarios$opportunities,"E","P")

    x <- scenarios %>% 
      select(measure, opportunities, scenario, thresholds, gini_all:palma_selec) %>% 
      gather(variable_ineq, value, -(measure:thresholds)) %>% 
      unite(temp, scenario, variable_ineq) %>% 
      spread(temp, value)
    
    write.table(x, "clipboard", sep="\t", row.names=FALSE)
    
    #chart with facets and no thresholds
    x <- scenarios %>% 
      filter(time_marker %in% c("-", "_", "count")) %>% 
      filter((measure %in% c("com","fca") & thresholds==60) |
               (measure %in% c("sa_stm") & thresholds==90) |
               (measure %in% c("bt","fos"))) %>% 
      select(measure, opportunities, scenario, times, thresholds, gini_all:palma_selec) %>% 
      gather(variable_ineq, value, -(measure:thresholds))
    
    x$Opportunity <- str_sub(x$opportunities, 1,1)
    x$Opportunity[x$Opportunity %in% c("p", "P")] <- "Park"
    x$Opportunity[x$Opportunity =="c"] <- "Culture"
    
    x <- x %>% 
      separate(col=variable_ineq, into = c("variable","area"))
    
      #change time in fos for 17 and 18, just for a better chart
    x2 <- x %>% 
      filter(measure=="fos")
    x2$times <- 17
    x3 <- x %>% 
      filter(measure=="fos")
    x3$times <- 18
    
    x <- x %>% 
      filter(measure!="fos") %>% 
      rbind(x2) %>% 
      rbind(x3)
    
    ggplot(x)+
      geom_point(aes(x=area, y=value, color= scenario), size=2, alpha=.3)+
      facet_grid(rows=vars(variable, times), cols=vars(Opportunity, str_to_upper(measure)), scales="free_y")+
      scale_y_continuous(name="Inequality")+
      scale_x_discrete(name="Area")+
      theme_light()
    
    ggsave("figures/p2-facet-no_thresholds.png")
    
    rm(x)
    
    #chart with facets and thresholds
    x <- scenarios %>% 
      filter(time_marker %in% c("-", "_", "count")) %>% 
      select(measure, opportunities, scenario, times, thresholds, gini_all:palma_selec) %>% 
      gather(variable_ineq, value, -(measure:thresholds))
    
    x$Opportunity <- str_sub(x$opportunities, 1,1)
    x$Opportunity[x$Opportunity %in% c("p", "P")] <- "Park"
    x$Opportunity[x$Opportunity =="c"] <- "Culture"
    
    x <- x %>%
      filter(variable_ineq %in% c("palma_all", "palma_selec"), thresholds>0)
    
    ggplot(x)+
      geom_line(aes(x=thresholds, y=value, color= scenario), size=.5, alpha=.3)+
      geom_point(aes(x=thresholds, y=value, color= scenario), size=2, alpha=.3)+
      facet_grid(rows=vars(variable_ineq, times), cols=vars(Opportunity, str_to_upper(measure)), scales="free_x")+
      scale_y_continuous(name="Pseudo-Palma ratio")+
      scale_x_continuous(name="Thresholds")+
      theme_light()
    
    ggsave("figures/p2-facet-palma.png")
    
    rm(x)
    
    scenarios$selec <- F
    
    scenarios$selec <- (scenarios$measure=="com" & scenarios$thresholds==90) |
      (scenarios$measure=="fca" & scenarios$thresholds==90) |
      (scenarios$measure=="sa_stm" & (scenarios$thresholds == 120) & scenarios$time_marker=="count") |
      (scenarios$measure=="fos" & scenarios$time_marker=="-")
    
    scen_aux <- scenarios %>%
      filter(selec==T, scenario_print %in% c("diff", "difE"))
    
    rm(x)
  
#6 BASIC MAPS####
  # 6.1 population - from census####
  census_table <- read_csv2("Basico_SP1.csv")
  
  census_table <- census_table %>% 
    select(Cod_setor, V002, V005) %>% #V002 - Moradores em domicílios particulares  permanentes ou população residente em domicílios particulares permanentes
                                      #V005 - Valor do rendimento nominal médio mensal das pessoas responsáveis por domicílios particulares permanentes (com e sem rendimento)
    rename(population = V002, avg_income = V005)
  
  census_table$Cod_setor <- as.character(census_table$Cod_setor)
  
  census <- census %>% 
    left_join(census_table, by=c("code_tract" = "Cod_setor"))
  
  census$pop_dens <- replace_na(census$population,0)/st_area(census)
  
  ggplot()+
    geom_sf(data=census, aes(fill=rank(pop_dens)), color=NA)+
    scale_fill_viridis_c(breaks = quantile(rank(census$pop_dens)),
                         labels = round(quantile(census$pop_dens),3))+
    ggmap::theme_nothing(legend=TRUE) +
    labs(x="", y="", fill="population density\n[m^-2]") +
    theme(legend.justification = c(1, 0), legend.position = c(1, 0.1))+
    annotation_scale(location = "br", width_hint = 0.5) +
    annotation_north_arrow(location = "tr", which_north = "true",
                           style = north_arrow_fancy_orienteering)
  
  ggsave("figures/p2-pop_dens.png")
  
  # 6.2 jobs - from OD####
  #each person might have 0, 1, or 2 jobs and each person has an expansion factor (FE_PESS). so if the person has a job, we expand it using 
  #FE_PESS. if she has 2, than we use half FE_PESS for each
  persons_aux <- OD_SP %>% 
                    select(ID_PESS, ZONATRA1, ZONATRA2, FE_PESS)%>% 
                    group_by(ID_PESS, ZONATRA1, ZONATRA2, FE_PESS) %>% 
                    summarise() %>% 
                    ungroup()
  
  persons_aux$n_jobs <- if_else(is.na(persons_aux$ZONATRA1),0,1) +
    if_else(is.na(persons_aux$ZONATRA2), 0, 1)
  
  persons_aux$EF_job1 <- persons_aux$FE_PESS / persons_aux$n_jobs #attributing an expansion factor to cases with no jobs or only one will not be a problem because the zones will be grouped at NA and descarded
  persons_aux$EF_job2 <- persons_aux$FE_PESS / persons_aux$n_jobs
  
  jobs <- persons_aux %>% 
    group_by(ZONATRA1) %>% 
    summarise(job_EF = sum(EF_job1, na.rm = T)) %>% 
    rename(zone = ZONATRA1)
  
  jobs <- jobs %>% 
    rbind(persons_aux %>% 
            group_by(ZONATRA2) %>% 
            summarise(job_EF = sum(EF_job2, na.rm = T)) %>% 
            rename(zone = ZONATRA2))
  
  jobs <- jobs %>% 
    group_by(zone) %>% 
    summarise(job_EF = sum(job_EF, na.rm = T))
  
  jobs$zone <- as.character(jobs$zone)
  
  zones <- zones %>% 
    left_join(jobs, by=c("idz"="zone"))
  
  zones$job_dens <- replace_na(zones$job_EF,0)/zones$Area_ha_2
  
  ggplot()+
    geom_sf(data=zones, aes(fill=rank(job_dens)), color=NA)+
    scale_fill_viridis_c(breaks = quantile(rank(zones$job_dens)),
                         labels = round(quantile(zones$job_dens),3))+
    ggmap::theme_nothing(legend=TRUE) +
    labs(x="", y="", fill="jobs density\n[ha^-1]") +
    theme(legend.justification = c(1, 0), legend.position = c(1, 0.1))+
    annotation_scale(location = "br", width_hint = 0.5) +
    annotation_north_arrow(location = "tr", which_north = "true",
                           style = north_arrow_fancy_orienteering)
  
  ggsave("figures/p2-job_dens.png")
  
  # 6.3 income - from census####
  ggplot()+
    geom_sf(data=zones, color="gray", fill="gray")+
    geom_sf(data=census %>% filter(!is.na(avg_income)), aes(fill=rank(avg_income)), color=NA)+
    scale_fill_viridis_c(breaks = quantile(rank( (census %>% filter(!is.na(avg_income)))$avg_income )),
                         labels = round(quantile( (census %>% filter(!is.na(avg_income)))$avg_income ),3))+
    ggmap::theme_nothing(legend=TRUE) +
    labs(x="", y="", fill="average monthly\nincome [R$]") +
    theme(legend.justification = c(1, 0), legend.position = c(1, 0.1))+
    annotation_scale(location = "br", width_hint = 0.5) +
    annotation_north_arrow(location = "tr", which_north = "true",
                           style = north_arrow_fancy_orienteering)
  
  ggsave("figures/p2-income.png")
  
  # 6.4 parks and line 15####
  zones_selec <- st_read("layers/zones_selec_dissolved.shp")
  line15_base <- st_read("layers/L15_base.shp")
  line15_prop <- st_read("layers/L15_prop.shp")
  rail <- st_read("layers/rail_base.shp") %>% 
    st_crop(xmin = -46.85, xmax = -46.35,
            ymin = -24, ymax = -23.35)
  
  ggplot()+
    geom_sf(data=zones, aes(fill="rural"),color=NA)+
    geom_sf(data=census %>% filter(zone=="URBANO"), aes(fill="urban"),color=NA)+
    geom_sf(data=parks, aes(fill="park"), color= "#61c28f" )+
    scale_fill_manual(
      values = c("rural" = "#d6f5d1", "urban" = "#d1d8d5", "park"= "#61c28f"), 
      labels = c("rural"= "Rural areas", "urban"= "Urban areas", "park"= "Parks"),
      name = ""
    )+
    geom_sf(data=rail, aes(color="r"))+
    geom_sf(data=line15_prop, aes(color="mp"), size=1.5)+
    geom_sf(data=line15_base, aes(color="mb"), size =1.2)+
    geom_sf(data=zones_selec, aes(color="selec"), fill=NA)+
    scale_color_manual(
      values = c("selec" = "purple", "r" = "black", "mp" = "#5470df", "mb"= "#d18262"), 
      labels = c("selec"= "Selected zones", "r"= "Rail", "mp"= "L15 expanded", "mb"= "L15 base"),
      name = ""
    )+
    ggmap::theme_nothing(legend=TRUE) +
    labs(x="", y="") +
    theme(legend.justification = c(1, 0), legend.position = c(1, 0.1))+
    annotation_scale(location = "br", width_hint = 0.5) +
    annotation_north_arrow(location = "tr", which_north = "true",
                           style = north_arrow_fancy_orienteering)
  
  ggsave("figures/p2-map-parks_L15.png")
  
  #parks closing hours
  parks_centr <- parks %>% 
    group_by(pde5_nome, Fechamento) %>% 
    summarise(area=sum(area_p,na.rm = T))
  
  parks_centr$centroids <- st_transform(parks_centr, 29101) %>% 
    st_centroid() %>% 
    st_transform(., '+proj=longlat +ellps=GRS80 +no_defs') %>%
    st_geometry()
  
  points <- parks_centr %>% 
    dplyr::select(pde5_nome) %>%
    st_transform(4326) %>%
    st_centroid()
  
  points <- points %>%
    sfc_as_cols(names = c("long", "lat"))
  points <- points %>%
    st_set_geometry(NULL) %>%
    as.data.frame()
  parks_centr <- parks_centr %>%
    left_join(points, by = "pde5_nome") %>% 
    st_drop_geometry()
  
  parks_centr <- parks_centr %>%
    select(-centroids)
  
  rm(points)
  
  parks_centr$name <- str_to_title(parks_centr$pde5_nome)
  parks_centr$display <- ((parks_centr$area>1.5e6) | (parks_centr$name %in% c("Ibirapuera","Villa Lobos"))) &
    (parks_centr$lat>-23.75)
    
  ggplot()+
    geom_sf(data=zones, fill="#d6f5d1",color=NA)+
    geom_sf(data=census %>% filter(zone=="URBANO"), fill="#d1d8d5", color=NA)+
    geom_sf(data=rail, color="black")+
    geom_label_repel(data=parks_centr %>% filter(display==T),
                    aes(x = long, y = lat, label = name),
                    nudge_x = c(0.1), nudge_y = c(.01))+
    geom_point(data=parks_centr,
               aes(x=long, y=lat, color=Fechamento, size=area))+
    scale_color_viridis_b(option = 'plasma')+
    scale_size_continuous(range = c(0,25))+
    ggmap::theme_nothing(legend=TRUE) +
    labs(x="", y="", color="Closing hour", size="Area [m2]") +
    xlim(-46.82, -46.35)+
    ylim(-23.8 , -23.38)+
    theme(legend.justification = c(0, 0), legend.position = c(-0.25, 0))+
    annotation_scale(location = "bl", width_hint = 0.5) +
    annotation_north_arrow(location = "tr", which_north = "true",
                           style = north_arrow_fancy_orienteering)
  
  ggsave("figures/p2-map-parks-hours.png")
  
  # 6.5 cultural places density####
  ggplot() +
    geom_sf(data=zones, color="gray", fill="gray") +
    geom_sf(data=zones %>% filter(urban==T), aes(fill=cpd))+
    scale_fill_viridis_b()+
    ggmap::theme_nothing(legend=TRUE) +
    labs(x="", y="",
         fill="Number of cultural places") +
    theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
    xlim(-46.82, -46.35)+
    ylim(-23.8 , -23.38)+
    annotation_scale(location = "bl", width_hint = 0.5) +
    annotation_north_arrow(location = "tr", which_north = "true",
                           style = north_arrow_fancy_orienteering)
  
  ggsave("figures/p2-map-cpd-density.png")
  
  ggplot() +
    geom_sf(data=zones, color="gray", fill="gray") +
    geom_sf(data=zones %>% filter(urban==T), aes(fill=cpn))+
    scale_fill_viridis_b()+
    ggmap::theme_nothing(legend=TRUE) +
    labs(x="", y="",
         fill="Number of cultural places") +
    theme(legend.justification = c(1, 0), legend.position = c(1, 0))+
    xlim(-46.82, -46.35)+
    ylim(-23.8 , -23.38)+
    annotation_scale(location = "bl", width_hint = 0.5) +
    annotation_north_arrow(location = "tr", which_north = "true",
                           style = north_arrow_fancy_orienteering)
  
  ggsave("figures/p2-map-cpn-density.png")
  
  # 6.6 sample with income####
  persons_geo <- (OD_SP %>% 
                    select(ID_PESS, CO_DOM_X, CO_DOM_Y)%>% 
                    group_by(ID_PESS, CO_DOM_X, CO_DOM_Y) %>% 
                    summarise() %>% 
                    ungroup() %>% 
                    st_as_sf(coords = c("CO_DOM_X", "CO_DOM_Y"),
                             crs = 22523)) %>% 
    left_join(persons)
  
  ggplot()+
    geom_sf(data=zones, color="gray", fill="gray")+
    geom_sf(data=persons_geo, aes(color=rank(RENDA_FA)), size = 0.1)+
    scale_color_viridis_c(breaks = quantile(rank( persons_geo$RENDA_FA )),
                         labels = round(quantile( persons_geo$RENDA_FA ),3),
                         name="Household monthly\nincome (R$)")+
    geom_sf(data=rail, color="black")+
    ggmap::theme_nothing(legend=TRUE) +
    labs(x="", y="", fill="jobs density (ha^-1)") +
    theme(legend.justification = c(1, 0), legend.position = c(1, 0.1))+
    annotation_scale(location = "br", width_hint = 0.5) +
    annotation_north_arrow(location = "tr", which_north = "true",
                           style = north_arrow_fancy_orienteering)

  ggsave("figures/p2-map-income_OD.png")
  
# 7 shared results####
  write.csv(persons_access,
            "results/persons_access.csv")
  
  write.csv(zones_access %>% 
              st_drop_geometry(),
            "results/zones_access.csv")
  