#####################################################################################################
### Script of the manuscript:																					                            ###
### Marin-Diaz B, et al - How grazing management can maximize erosion resistance of salt marshes  ###
### Date created: 17/09/19 Groningen                                                              ###
### Last modified: 18/01/21                                                                       ###
#####################################################################################################

#Table of contents:
## A. Calculate erosion rates
## B. Analisys of soil stability with all the samples (including samples with sand layers)
### B.1. What affects the clay depth?
### B.2. Changes in soil elevation inside and outside exclosures
## C. Analisys of the erosion of the clay layer (excluding samples with sand layers)
### C.1. Test significant differences on clay erosion among treatments - LARGE GRAZERS
### C.2. Test significant differences on clay erosion among treatments - SMALL GRAZERS
## D. Which variables are correlated to the clay layer erosion?
### D.1. Analisys clay erosion only large herbivores, without outlier 6hin3
### D.2. Analisys clay erosion only small herbivores, without outlier 4LO3

    #delete previous objects
    rm(list=ls(all=TRUE))	 
    
    #set working directory:
    inputDirectory1 = 'C:/'
    setwd(inputDirectory1)
    
    # Load required packages:
    library(lattice)
    #install.packages("ggcorrplot")
    library("ggcorrplot")
    library("PerformanceAnalytics") #for chart.Correlation
    # install.packages("dplyr")
    library(dplyr)#calculate means, sd etc
    library(lme4)#package for linear mixed model
    #for the VIF coefficients (multicollinearity)
    #install.packages("car")
    #install.packages("openxlsx")
    #install.packages("rlang")
    library(rlang)
    library(car)
    library(openxlsx)
    library(multcomp)# for function glht (post hoc for LMM)
    #install.packages("multcompView") #for plots with tukey post hoc labels
    library(multcompView)
    library(ggpubr)#for ggplots
    #install.packages("dplyr")
    library(plotrix)#to calculate standard error with dplyr

#############
#   A. Calculate erosion rates
#############
  dataerosion<-read.csv("datavolumeloss.csv")
  
  #organize data for analysis (melt data)
  library(reshape2)
  head(dataerosion)
  data.melt <-reshape2::melt(data, id=c("Code")) 
  head(data.melt)
  data.melt$time <-data.melt$variable
  data.melt$erosion <- data.melt$value
  data.melt$value <- NULL
  data.melt$variable <- NULL
  # extract the time in h from the variable time
  data.melt$time<-substr(data.melt$time,2,5)
  head(data.melt)
  
  ###CALCULATION EROSION VALUES with Michaelis-Menten function 
    ##theoretical example
    library(drc) # for fitting the Michaelis-Menten curve, install package when needed
    x<-c(0,1,2,3,4,6)
    y<-c(0,5,6.5,7,7,7)
    plot(y~x)
    d<-data.frame(x,y)
    m1<-drc::drm(y~x, data=d,fct=MM.2())
    summary(m1)
    plot(m1,log="")
  
    ##selecting our data
    d_sel<-subset(data.melt,Code=="6HIM2") #change Code for every sample 
    with(d_sel,plot(erosion~time,pch=16))
    d_sel$time<-as.numeric(d_sel$time)
    str(d_sel)
    ###values "d"(possible maximum volume loss over time) and "e"(time when half of the total volume loss is lost) copied in the table of variables
    m1<-drc::drm(erosion~time, data=d_sel[complete.cases(d_sel),],fct=MM.2())
    summary(m1)
    plot(m1,log="")

#############
#   B. Analisys of soil stability with all the samples (including samples with sand layer)
#############
    #load data variables and erosion (e(e_rate) and d (d_max_lost), previously calculated)
    data_sand<-read.csv("datavariablessand.csv")
    #All the high marsh samples from the area grazed by small herbivores have sand layer below the clay layer. They collpase before 2h of wave exposure. 
    #Therefore the main drive for soil stability considering all the samples is the clay depth 
    
###
#   B.1. what affects the clay layer depth? Analysis of small and large grazers separately
###
    
    #select large grazers data: result MS #1
    large<-(data_sand[61:78,])
    #two way anova with interaction of treatment and elevation
    mod21<-aov(log(claydepth)~grazing*elev, large)
    summary(mod21) #treatment is significant, elevation and time are not significant
    summary.lm(mod21)
    shapiro.test(resid(mod21)) 
    hist(resid(mod21))
    tk21<-TukeyHSD(mod21)
    tk21 
    #clay is deeper in low marsh and not grazed or artificial mowed
    
    #select small grazers data
    small<-(data_sand[1:60,])
    small$age<-factor(small$age)
    
    #Linear mixel model (LMM) to test the effect of grazing in clay depth in the high marsh:  MS #2
    modLMM1<-lmer(claydepth~grazing+(1|age),smallhigh) #"grazing" referes to grazed/ not grazed, we include age as random factor
    summary(modLMM1)
    shapiro.test(resid(modLMM1))
    plot(modLMM1)
    summary(glht(modLMM1, linfct=mcp(grazing="Tukey")))
    Anova(modLMM1) #effect of grazing
    
    
    #Linear mixel model (LMM) to test the effect of grazing in clay depth in the low marsh:  MS #2
    modLMM1<-lmer(claydepth~grazing+(1|age),smalllow) #"grazing" referes to grazed/ not grazed, we include age as random factor
    summary(modLMM1)
    shapiro.test(resid(modLMM1))
    plot(modLMM1)
    summary(glht(modLMM1, linfct=mcp(grazing="Tukey")))
    Anova(modLMM1) #no effect of grazing
    
    #1way anova - differences of clay depth between ages in the high marsh: MS #3
    smallhigh<-small[which(small$elev=="high"),]
    mod22<-aov(claydepth~age*grazing, smallhigh) #there isn't any interaction between age and grazing, so I analyse grazing separately
    mod22<-aov(claydepth~age, smallhigh)
    summary(mod22)
    hist(resid(mod22))
    shapiro.test(resid(mod22))# 
    #plot(mod22)
    tk22<-TukeyHSD(mod22)
    tk22
    
    #1way anova - differences of clay depth between ages in the low marsh: MS #3
    smalllow<-small[which(small$elev=="low"),]
    mod22<-aov(claydepth~age, smalllow)
    summary(mod22)
    hist(resid(mod22))
    shapiro.test(resid(mod22))# 
    #plot(mod22)
    tk22<-TukeyHSD(mod22)
    tk22
    
    #clay is deeper in low marsh and older marshes (unless outlier age 53, unknown reason). Grazing by small herbivores in high marsh affects the clay layer, but not in the low marsh
    
    #FIGURE 3AB: BOXPLOTS clay depth 
    datahigh<- data_sand[which (data_sand$elev=='high'),]
    datahigh$age<-factor(datahigh$age)
    datalow<- data_sand[which (data_sand$elev=='low'),]
    datalow$age<-factor(datalow$age)
    
    library(dplyr)
    library(plotrix)#to calculate standard error
      df.summary1 <- datahigh %>%
      group_by(Site, grazing, plant_type, granulosity) %>%
      summarise(
        sd = sd(claydepth),
        se = std.error(claydepth),
        clay = mean(claydepth)
      )
    df.summary1
    
    df.summary2 <- datalow %>%
      group_by(Site, grazing, plant_type, granulosity) %>%
      summarise(
        sd = sd(claydepth),
        se = std.error(claydepth),
        clay = mean(claydepth)
      )
    df.summary2
    
    
    p_high <-ggplot(df.summary1, aes(x=Site, y=clay)) +
      theme_classic()+
      scale_y_reverse(limits = c(25,0))+
      geom_errorbar(aes(ymin = clay-se, ymax = clay+se, color = grazing),lwd=0.5,position = position_dodge(0.3), width = 0.2)+
      geom_point(aes(col=grazing, shape=df.summary1$plant_type), position = position_dodge(0.3),size=3) +
      scale_shape_manual(name= "Veg. type", values = c(19,15,18))+
      #scale_fill_manual(name= "Management", values = c("cyan3","black", "darkgrey"))+
      scale_color_manual(name= "Management", values = c("cyan3","black", "darkgrey"))+
      labs( x= "Saltmarsh age (years)", y = "Fine-grained layer depth (cm)")+
      scale_x_discrete(position = "top")
    p_high 
    #ylim(0, 25)
    p_low <-ggplot(df.summary2, aes(x=Site, y=clay)) +
      theme_classic()+
      scale_y_reverse(limits = c(25,0))+
      geom_errorbar(aes(ymin = clay-se, ymax = clay+se, color = grazing),lwd=0.5,position = position_dodge(0.3), width = 0.2)+
      geom_point(aes(col=grazing, shape=df.summary2$plant_type), position = position_dodge(0.3),size=3) +
      scale_shape_manual(name= "Veg. type", values = c(19,15,18,17))+
      #scale_fill_manual(name= "Management", values = c("cyan3","black", "darkgrey"))+
      scale_color_manual(name= "Management", values = c("cyan3","black", "darkgrey"))+
      labs( x= "Saltmarsh age (years)", y = "Fine-grained layer depth (cm)")+
      scale_x_discrete(position = "top")
    p_low
    
###
#   B.2. changes in soil elevation inside and outside exclosures
###  
    elev<-read.csv("elev_r.csv")
    #for large grazers we apply a t-test because the elevation around the exclosures were stable
    #for the small grazers, the elevation around the exclosures varied due to the shape of the dune, therefore we analyse the points taken in pairs (in and out but at the same location), to avoid the effect of the place itself
    
    library(dplyr)
    meanvar<-elev %>% group_by(treatment) %>% summarise_all(funs(mean))
    #write.csv(meanvar, "meanselev.csv")
    sdvar<-elev %>% group_by(treatment) %>% summarise_all(funs(sd))
    #write.csv(sdvar, "sdelev.csv")
    
    ####SMALL GRAZERS PAIRED WILCOX TEST
    
    h1 <-(elev[1:16,]) #result: not significant
        install.packages("PairedData")
        # Subset elevation data for stage 23 high marsh inside exclosure
        h1in <- subset(elev,  treatment == "1HI", elev,
                         drop = TRUE)
        # Subset elevation data for stage 23 high marsh outnside exclosure
        h1out <- subset(elev,  treatment == "1HO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(h1in, h1out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = h1, paired = TRUE)
        res
        
    l1 <-(elev[17:32,]) #result: p=0.03, significantly lower outisde
        # Subset elevation data for stage 23 low marsh inside exclosure
        l1in <- subset(elev,  treatment == "1LI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        l1out <- subset(elev,  treatment == "1LO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(l1in, l1out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = l1, paired = TRUE)
        res
 
        
    h2 <-(elev[33:48,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        h2in <- subset(elev,  treatment == "2HI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        h2out <- subset(elev,  treatment == "2HO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(h2in, h2out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = h2, paired = TRUE)
        res
        
        
    l2 <-(elev[49:68,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        l2in <- subset(elev,  treatment == "2LI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        l2out <- subset(elev,  treatment == "2LO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(l2in, l2out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = l2, paired = TRUE)
        res
        
    h3 <-(elev[69:86,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        h3in <- subset(elev,  treatment == "3HI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        h3out <- subset(elev,  treatment == "3HO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(h3in, h3out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = h3, paired = TRUE)
        res
        
        
    l3 <-(elev[87:102,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        l3in <- subset(elev,  treatment == "3LI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        l3out <- subset(elev,  treatment == "3LO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(l3in, l3out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = l3, paired = TRUE)
        res
        
    h4 <-(elev[103:118,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        h4in <- subset(elev,  treatment == "4HI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        h4out <- subset(elev,  treatment == "4HO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(h4in, h4out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = h4, paired = TRUE)
        res
        
    l4 <-(elev[119:134,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        l4in <- subset(elev,  treatment == "4LI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        l4out <- subset(elev,  treatment == "4LO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(l4in, l4out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = l4, paired = TRUE)
        res
        
    h5 <-(elev[135:150,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        h5in <- subset(elev,  treatment == "5HI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        h5out <- subset(elev,  treatment == "5HO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(h5in, h5out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = h5, paired = TRUE)
        res
        
    l5 <-(elev[151:174,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        l5in <- subset(elev,  treatment == "5LI", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        l5out <- subset(elev,  treatment == "5LO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(l5in, l5out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = l5, paired = TRUE)
        res
        
    ### LARGE GRAZERS T-TEST 
        
    him6 <-(elev[175:180,]) #result: not significant, too low replicates
        # Subset elevation data for stage 23 low marsh inside exclosure
        him6in <- subset(elev,  treatment == "6HIM", elev,
                       drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        him6out <- subset(elev,  treatment == "6HOMO", elev,
                        drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(him6in, him6out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = him6, paired = TRUE)
        res
        
        t.test(elev ~ treatment, data = him6) #SIGNIFICANT
        
    hin6 <-(elev[181:200,]) #result: significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        hin6in <- subset(elev,  treatment == "6HIN", elev,
                         drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        hin6out <- subset(elev,  treatment == "6HOM", elev,
                          drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(hin6in, hin6out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = hin6, paired = TRUE)
        res
        
        t.test(elev ~ treatment, data = hin6)#SIGNIFICANT
        
    hon6 <-(elev[196:205,]) #result: 0.06, too low replicates
        # Subset elevation data for stage 23 low marsh inside exclosure
        hon6in <- subset(elev,  treatment == "6HON", elev,
                         drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        hon6out <- subset(elev,  treatment == "6HOM", elev,
                          drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(hon6in, hon6out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = hon6, paired = TRUE)
        res
        
        t.test(elev ~ treatment, data = hon6)#SIGNIFICANT
        
    lim6 <-(elev[206:213,]) #result: not significant too low replicates
        # Subset elevation data for stage 23 low marsh inside exclosure
        lim6in <- subset(elev,  treatment == "6LIM", elev,
                         drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        lim6out <- subset(elev,  treatment == "6LOMO", elev,
                          drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(lim6in, lim6out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = lim6, paired = TRUE)
        res
        
        t.test(elev ~ treatment, data = lim6)# NOT SIGNIFICANT
        
    lin6 <-(elev[214:237,]) #result: not significant
        # Subset elevation data for stage 23 low marsh inside exclosure
        lin6in <- subset(elev,  treatment == "6LIN", elev,
                         drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        lin6out <- subset(elev,  treatment == "6LOM", elev,
                          drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(lin6in, lin6out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = lin6, paired = TRUE)
        res
        
        t.test(elev ~ treatment, data = lin6)#SIGNIFICANT
        
    lon6 <-(elev[238:247,]) #result: 0.06, too low replicates
        # Subset elevation data for stage 23 low marsh inside exclosure
        lon6in <- subset(elev,  treatment == "6LON", elev,
                         drop = TRUE)
        # Subset elevation data for stage 23 low marsh outside exclosure
        lon6out <- subset(elev,  treatment == "6LONO", elev,
                          drop = TRUE)
        # Plot paired data
        library(PairedData)
        pd <- paired(lon6in, lon6out)
        plot(pd, type = "profile") + theme_bw()
        
        # Compute t-test
        res <- wilcox.test(elev ~ treatment, data = lon6, paired = TRUE)
        res
        
        t.test(elev ~ treatment, data = lon6)#SIGNIFICANT
        
        
#############
#   C. Analisys of the erosion of the fine-grained layer (excluding samples with sand layers)
#############

  data<-read.csv("datavariablesclay38.csv") #table with erosion, sediment and vegetation variables
  str(data)
  data$age<-as.factor(data$age)
  #the calculated possible volume loss over time (d_max_lost) correlates with the real maximum volume lost (et38onlyclay):
  plot(d_max_lost~et38onlyclay, data)
  plot(e_rate~d_max_lost, data) #e_rate is the time when half of the total volume loss is lost
  boxplot(e_rate~Treatment, las=2, data) #within the first 2 hours, almost all the sample had lost half of the possible maximum weight lost.
  
  
  #Test significant differences on clay erosion among treatments will be done for large and small herbivores separatelly
  hares<-(data[1:35,])
  cows<-(data[36:53,])
  
  
###
#   C.1. Test significant differences on clay erosion among treatments - LARGE GRAZERS
###
  # MS#4       
  #anova with log transform to assume normality of the residuals
  mod1<-aov(lm((log(d_max_lost+0.01))~grazing*elev, cows))#treatment is significant,  but no effect of elevation nor interaction, so we simplify the model:
  #check normality of residuals
  par(mfrow = c(2,2))
  plot(mod1)
  par(mfrow = c(1,1))
  hist(resid(mod1))
  shapiro.test(resid(mod1))# normal
  summary(mod1) 
  tk1<-TukeyHSD(mod1) #all significantly different unless the mowed and juncus patches 
  tk1
  #                                   diff        lwr       upr     p adj
  # Grazed-Artificial mowed     -2.686183 -3.6240725 -1.748293 0.0000166
  # Not Grazed-Artificial mowed  1.112080  0.1741897  2.049969 0.0206838
  # Not Grazed-Grazed            3.798262  2.8603724  4.736152 0.0000004
  
###
#   C.2. Test significant differences on clay erosion among ages and grazing by small herbivores
###  
  modLMM1<-lmer(d_max_lost~grazing+(1|Site),hares) #MS#5 "grazing" referes to grazed/ not grazed, we include age as random factor
  summary(modLMM1)
  shapiro.test(resid(modLMM1))
  plot(modLMM1)
  summary(glht(modLMM1, linfct=mcp(grazing="Tukey")))
  Anova(modLMM1) #effect of grazing, not of Site
  
  #one way Anova to test effect of Site #MS 6
    mod2<-aov(lm(d_max_lost~Site*grazing, hares))# no interaction of grazing * age, so we simplify to 1-way anova with age
    mod2<-aov(lm(d_max_lost~Site, hares))
    #check normality of residuals
    par(mfrow = c(2,2))
    plot(mod2)
    par(mfrow = c(1,1))
    hist(resid(mod2))
    shapiro.test(resid(mod2))# normal
    summary(mod2) 
    summary.lm(mod2) 
    tk2<-TukeyHSD(mod2) 
    tk2 
   
    #figure 3C
    library(dplyr)
    df.summary1 <- data %>%
      group_by(Treat2, grazing, plant_type, granulosity,Site) %>%  ##Treat2 has the samples from location not separated by elevation, becuase the anova was not significant
      summarise(
        sd = sd(d_max_lost),
        se = std.error(d_max_lost),
        clay = mean(d_max_lost)
      )
    df.summary1
    
    p_ero <-ggplot(df.summary1, aes(x = Site, y = clay)) +
      theme_classic()+
      geom_errorbar(aes(ymin = clay-se, ymax = clay+se, color = grazing),lwd=0.5,position = position_dodge(0.3), width = 0.2)+
      geom_point(aes(col=grazing, shape=df.summary1$plant_type), position = position_dodge(0.3),size=3) +
      scale_shape_manual(name= "Veg. type", values = c(19,15,18,17))+
      #scale_fill_manual(name= "Granulosity", values = c("white", "Gold", "lightgreen", "cyan2"))+
      scale_color_manual(name= "Management", values = c("cyan3","black", "darkgrey"))+
      labs( x= "Saltmarsh age (years)", y = "Maximum volume loss (%)")+
      ylim(0, 7.5)
    p_ero
    

#############
#   D. Which variables are correlated to the clay layer erosion?
#############  
  
  ### Analysis small and large grazers per separate:
    #check the relations among the variables and relation with erosion (correlations and PCA)
    #From representative variables, Anovas and LMM are performed to find differences between:
        #treatments
        #vegetation type
        #soil texture
  
###
#   D.1. Analisys clay erosion only large herbivores (samples from high and low marsh)
###
  
  #"possible maximum volume loss over time" (d_max_lost) was utilised as the erosion variable, calculated in the first part of the script
  
  datacows <- cows[-6,c(9:19, 23)]#subset numeric variables & erosion var, and without outlier 6hin3
  
  #check correlations between variables
  chart.Correlation(log(datacows), histogram=TRUE, pch=19) #log transform data for linear correlations
  chart.Correlation(datacows, histogram=TRUE) #without log transform 
  
      Mcorre3<-as.dist(cor(log(datacows), method="pearson"))#spearman correlations to account for non linearity
      round(Mcorre3,2) 
      library(Hmisc)
      out03<-rcorr(as.matrix(log(datacows), method="pearson"))#significance matrix
      round(as.dist(out03$P),4)
       
  
    #PCA
      pcacows<-(cows[-c(6,8),c(9:19)]) #subset numeric variables without erosion
      #overview variables 
      boxplot(pcacows, las=2)
      boxplot(scale(pcacows), las=2)# standarized variables 
      trancows<-log(pcacows)
      boxplot(scale(trancows), las=2)
      
      
      library(ggfortify)
      cows2 <- cows[-c(6,8),]
      df <- trancows
      pca_res <- prcomp(df, scale. = TRUE)
      
      autoplot(pca_res, data=cows2, size="d_max_lost", alpha=0.5,
               loadings=TRUE, loadings.colour= "grey", loadings.label=TRUE, loadings.label.colour="black", loadings.label.size=2.5)+
        theme_classic()+
        #scale_color_manual(name= "Management", values = c("cyan3", "darkgrey","black"))+
        scale_size_continuous(range = c(1,10))
        #scale_shape_manual(name= "Veg. type", values = c(19, 15, 18))
      
      autoplot(pca_res, data=cows2, colour="Treat2", size="d_max_lost", alpha=0.7,shape="plant_type",
              loadings=TRUE, loadings.colour= "transparent", loadings.label=TRUE, loadings.label.colour="black", loadings.label.size=3, loadings.label.vjust = -0.5)+
              theme_classic()+
              scale_color_manual(name= "Management", values = c("cyan3", "darkgrey","black"))+
              scale_size_continuous(range = c(1,10))+
              scale_shape_manual(name= "Veg. type", values = c(19, 15, 18))
      
    
      # values correlations PCA:
      #install.packages("vegan")
      # library(vegan)
      # pca.out <- rda(trancows, scale=T)
      # 
      # n<-dim(trancows) [1]
      # n
      # p<-dim(trancows)[2]
      # p
      # 
      # #"display = 'sites'" corresonds to the rows of data; one datapoint per row
      # #"display = 'species'" corresponds to the columns of data; one line/arrow per column
      # #pesos de las variables originales en las tres primeras componentes principales
      # summary(pca.out)$species[,1:3]
      # #correlation between the original variables and the 3 first principal components
      # n_dat<-cbind(summary(pca.out)$sites[,1:3],trancows)
      # m_cor<-as.matrix(cor(n_dat))
      # m_cor[1:3,4:(p+3)]
      # #significance of the correlations
      # library(Hmisc)
      # s_cor<-rcorr(as.matrix(n_dat))
      # round(s_cor$P[1:3,4:(p+3)],4)
      
      
      # Regressions soil and vegetation variables against erosion
      
      ##fit log-log models to get R-squared values
      fit<-lm(log(d_max_lost) ~ log(Bulk), cows2) ##do this model with all the variables by hand to obtain the r-squared
      hist(resid(fit))
      shapiro.test(resid(fit))
      summary(fit)
      plot(log(d_max_lost) ~ log(TotalBiom), cows2)
      
      x<-(cows2$Bulk)
      y<-(cows2$d_max_lost)
      
      model1<-lm(log(y)~log(x))
      hist(resid(model1))
      shapiro.test(resid(model1))
      #plot(model1)
      plot(y~x,pch=16, ylab="Total volume loss after 1h (%)")
      summary(model1)
      anova(model1)
      xpred<-seq(0,80,0.001) # predict from min to max of x values
      ypred<-exp(predict(model1,list(x=xpred), type="response"))# predict the data, note to include the log transformation of x as well
      lines(ypred~xpred, lty=3, col="grey",lwd=3)
      
      #results regressions power law (log(y)~log(x))
      #soil:
      #bulk: 0.77*** 
      #pen resist: 0.59*** 
      #Silt:0.50*** 
      #OC: 0.08ns 
      #SWC=0.03ns 
      #vegetation:
      #TotalBiom: 0.89***
      #RDtot=0.87*** 
      #RD.F= 0.83*** 
      #RD.C= 0.68*** 
      #RhizD = 0.62***
      #root diam: 0.28* p=0.01
      
      ##make a plot with all the variables
      
      library(tidyverse)      # for dplyr, ggplot2 etc
      cowss<- cows2 %>% 
        pivot_longer(
          cols = c("Bulk","SWC","OC","Silt","PenResist","TotalBiom", "RDtot","RD.C","RD.F","RhizD","RootDiam"),
          names_to = "varname",
          values_to = "predvar",
          values_drop_na = TRUE)
      
      cowss$varname<- factor(cowss$varname,levels=c("Bulk","SWC","OC","Silt","PenResist","TotalBiom", "RDtot","RD.C","RD.F","RhizD","RootDiam")) #to put the order that you want
      
      # plot with all the variables: power-law curves (FIG. 4)
      cowss %>% ggplot(aes(x=predvar,y=d_max_lost)) + 
        #geom_point(size=2,alpha=0.6, aes(col=grazing) ) + #geom_point(size=3,aes(col=Treatment, alpha=0.6) ) +
        labs(x="predictor variable", y="Max. volume loss (%)") +
        geom_point(aes(col=Treat2,bg=granulosity, shape=plant_type),size=2, stroke=2) +
        scale_shape_manual(name= "Veg. type", values = c(21,22,23,24))+
        scale_fill_manual(name= "Granulosity", values = c("white", "lightgreen", "cyan2"))+
        scale_color_manual(name= "Management", values = c("cyan3", "darkgrey","black"))+
        #scale_color_manual(values = c( 'yellowgreen',  'deepskyblue2', 'yellow2'))+
        theme_classic(base_size=14)+
        #annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf)+ #to add the line in the Y axis
        #annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf)+ #to add the line in the Y axis
        scale_x_continuous(trans = 'log', breaks = scales::pretty_breaks(n = 4)) +
        scale_y_continuous(trans = 'log', breaks = scales::pretty_breaks(n = 4),limits = c(-1,7)) +
        stat_smooth(method = 'lm', se = FALSE, col="black", lty="dotted") +
        coord_trans(y = 'exp', x = 'exp')+
        #stat_smooth(method = 'glm', formula = 'y~log(x)', se=FALSE, family = gaussian(link = 'log'), col="black", lty="dotted") +
        #geom_smooth(method = "lm", se=FALSE, formula= log(y) ~log(x), col="black", lty="dotted") +
        #geom_smooth(method = "glm", se=FALSE, method.args = list(family = "Gamma"), col="black", lty="dotted") +
        facet_wrap(~varname,scale="free")+
        #stat_cor(method="glm", method.args = list(family = "Gamma"))+
        #facet_wrap(~varname,scale="free_x")+
        theme(strip.background = element_rect(colour="white", fill="white")) #to remove the box from the titles
      
      
      
  ##2-way ANOVAs to test differences between treatmens of the variables most related with erosion: 
      #sediment property most correlated with erosion is: 
        #Bulk density
      #Vegetation properties
        #total root density (the highest R2 was from total belowground biomass but we select total root density because is the compartment that best explains the total belowground biomass)
        
  #Bulk density ~ treatment (+plant type & granulosity) # MS7
      mod0<-aov(log(Bulk)~grazing*elev, cows) #compaction differes between treatments and interacts with elevation
      par(mfrow = c(2,2))
      plot(mod0)
      par(mfrow = c(1,1))
      summary(mod0)
      summary.lm(mod0)
      hist(resid(mod0))
      shapiro.test(resid(mod0))#normal
      tk0<-TukeyHSD(mod0)
      tk0
      
  #root density. Root density is also correlated with bulk density (rs=-0.9), so everything is mediated by the compactness of the soil 
      mod0<-aov(log(densityroots)~grazing*elev, cows) # MS6 #differences on root density with the grazing and elevation 
      par(mfrow = c(2,2))
      plot(mod0)
      par(mfrow = c(1,1))
      summary(mod0)
      summary.lm(mod0)
      shapiro.test(resid(mod0))#normal
      tk0<-TukeyHSD(mod0)
      tk0
      
###
#   D.2. Analisys clay erosion with small herbivores (samples from low marsh and location 63yr high marsh because the clay layer didn't collapse)
###
  
  #"possible maximum volume loss over time" (d_max_lost) was utilised as the erosion variable, calculated in the first part of the script
  vhares<-(data[c(1:26,28:29),]) #without outlier 4LO1
  boxplot(et38onlyclay~Treatment, data=vhares)
  
  #check correlations between variables
  datahares <- vhares[,c(9:19, 23)]#subset numeric variables & erosion var 
  chart.Correlation(log(datahares), histogram=TRUE) #log transform data for linear correlations
  
      Mcorre4<-as.dist(cor(log(datahares), method="pearson"))
      round(Mcorre4,2)
      out04<-rcorr(as.matrix(log(datahares), method="pearson"))
      round(as.dist(out04$P),4)
    
  #PCA
      pcahares<-(vhares[,c(9:19)]) #subset numeric variables
      #overview variables & PCA
      boxplot(pcahares, las=2)
      boxplot(scale(pcahares), las=2)# standarized variables 
      tranhares<-log(pcahares)#no need to transform
      boxplot(scale(tranhares), las=2)
      
      
      library(ggfortify)
      df <- tranhares
      pca_res <- prcomp(df, scale. = TRUE)
      
      autoplot(pca_res, data=vhares, size="d_max_lost", alpha=0.5,
               loadings=TRUE, loadings.colour= "grey", loadings.label=TRUE, loadings.label.colour="black", loadings.label.size=2.5)+
        theme_classic()+
        #scale_color_manual(name= "Management", values = c("cyan3", "darkgrey","black"))+
        scale_size_continuous(range = c(1,10))
      #scale_shape_manual(name= "Veg. type", values = c(19, 15, 18))
      
       autoplot(pca_res, data=vhares, colour="grazing", size="d_max_lost", alpha=0.7,
               loadings=TRUE, loadings.colour= "transparent", loadings.label=TRUE, loadings.label.colour="black", loadings.label.size=3, shape="plant_type")+
        theme_classic()+
        scale_color_manual(name= "Management", values = c("black", "darkgrey"))+
        scale_size_continuous(range = c(1,10))+
        scale_shape_manual(name= "Veg. type", values = c(19, 17))
      
      
            # # values correlations PCA:
            #     #install.packages("vegan")
            # library(vegan)
            # pca.out <- rda(tranhares, scale=T)
            # 
            # n<-dim(tranhares) [1]
            # n
            # p<-dim(tranhares)[2]
            # p
            # 
            # #precentage of variability explained by each principal component 
            # ev<-pca.out$CA$eig
            # varExp <- ev/sum(ev)*100
            # varExp
            # 
            # #Kaiser-Guttman criterio
            # pca.out$CA$eig #the pricipal component utilised should be higher than 1
            # 
            # #"display = 'sites'" corresonds to the rows of data; one datapoint per row
            # #"display = 'species'" corresponds to the columns of data; one line/arrow per column
            # #pesos de las variables originales en las tres primeras componentes principales
            # summary(pca.out)$species[,1:3]
            # #correlacion entre variables originales y las 3 primeras componentes principales
            # n_dat<-cbind(summary(pca.out)$sites[,1:3],tranhares)
            # m_cor<-as.matrix(cor(n_dat))
            # m_cor[1:3,4:(p+3)]
            # #miramos si estas son significativas
            # library(Hmisc)
            # s_cor<-rcorr(as.matrix(n_dat))
            # round(s_cor$P[1:3,4:(p+3)],4)
            # 
      
       ##correlations between each variable and erosion:
       
       ##fit log-log models 
       fit<-lm(d_max_lost ~ log(y)~log(x), vhares) ##fit this model with all the variables by hand to obtain the r-squared
       hist(resid(fit))
       shapiro.test(resid(fit))
       summary(fit)
     
       
       x<-(vhares$TotalBiom)
       y<-(vhares$d_max_lost)
       
       
       model1<-lm(log(y)~log(x))  
       hist(resid(model1))
       shapiro.test(resid(model1))
       plot(y~x,pch=16, ylab="Total volume loss after 1h (%)")
       summary(model1)
       xpred<-seq(0,80,0.001) # predict from min to max of x values
       ypred<-exp(predict(model1,list(x=xpred), type="response"))# predict the data, note to include the log transformation of x as well
       lines(ypred~xpred, lty=3)
       
       
      
       #results power-law regressions log(y)~log(x) 
       #soil:
         #bulk: 0.47*** 
         #pen resist: 0.37 *** 
         #Silt:0.0ns 
         #OC: 0.01ns 
         #SWC=0.16* p = 0.01 
       #vegetation:
         #TotalBiom: 0.42*** p = 0.0001
         #RDtot=0.41***
         #RD.F= 0.43***
         #RD.C= 0.25** p=0.003
         #RhizD = 0.03ns
         #root diam: 0.28** p=0.001
       
       ##plot all correlations
       
        varhares<- vhares %>% 
        pivot_longer(
          cols = c("Bulk","SWC","OC","Silt","RDtot","RD.C","RD.F","RhizD","TotalBiom", "RootDiam", "PenResist"),
          names_to = "varname",
          values_to = "predvar",
          values_drop_na = TRUE)
      
      varhares$varname<- factor(varhares$varname,levels=c("Bulk","SWC","OC","Silt","PenResist","TotalBiom", "RDtot","RD.C","RD.F","RhizD","RootDiam")) #to put the order that you want
      
      # plot with all the variables: power-law curves (FIG. 5)
      varhares %>% ggplot(aes(x=predvar,y=d_max_lost)) + 
        #geom_point(size=2,alpha=0.6, aes(col=plant_type) ) + #geom_point(size=3,aes(col=Treatment, alpha=0.6) ) +
        labs(x="predictor variable", y="Max. volume loss (%)") +
        geom_point(aes(col=grazing,bg=granulosity, shape=plant_type),size=2, stroke=2) +
        scale_shape_manual(name= "Veg. type", values = c(21,24))+
        scale_fill_manual(name= "Granulosity", values = c("white", "Gold", "lightgreen", "cyan2"))+
        scale_color_manual(name= "Management", values = c("black", "darkgrey"))+
        #scale_color_manual(values = c( 'yellowgreen',  'deepskyblue2', 'yellow2'))+
        theme_classic(base_size=14)+
        scale_x_continuous(trans = 'log', breaks = scales::pretty_breaks(n = 4)) +
        scale_y_continuous(trans = 'log', breaks = scales::pretty_breaks(n = 4),limits = c(-1,7)) +
        stat_smooth(method = 'lm', se = FALSE, col="black", lty="dotted") +
        coord_trans(y = 'exp', x = 'exp')+
        #annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf)+ #to add the line in the Y axis
        #annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf)+ #to add the line in the Y axis
        #geom_smooth(method = "lm", se=FALSE, formula=y~x+I(x^2), col="black", lty="dotted") +
        facet_wrap(~varname,scale="free")+
        #facet_wrap(~varname,scale="free_x")+
        theme(strip.background = element_rect(colour="white", fill="white")) #to remove the box from the titles
      
      
  ##2-way ANOVAs to test differences between treatmens of the variables related with erosion:
    #sediment property most correlated to erosion is Bulk density  (correlated to resistance to penetration at 5cm - pensresist5) and granulosity

    #plantype and erosion
    modLMM1<-lmer(sqrt(d_max_lost)~plant_type+(1|Site),vhares) #"grazing" referes to grazed/ not grazed, we include age as random factor
    summary(modLMM1)
    shapiro.test(resid(modLMM1))
    plot(modLMM1)
    summary(glht(modLMM1, linfct=mcp(plant_type="Tukey")))
    summary(glht(modLMM1, lsm(pairwise ~ grazing|age)))#if in the model I also include age as fixed factor. why does it show the same results for all the sites?
    Anova(modLMM1) #effect of grazing, not of Site
    
    #granulosity and erosion
    modLMM1<-lmer(sqrt(d_max_lost)~granulosity+(1|Site),vhares) #"grazing" referes to grazed/ not grazed, we include age as random factor
    summary(modLMM1)
    shapiro.test(resid(modLMM1))
    plot(modLMM1)
    summary(glht(modLMM1, linfct=mcp(granulosity="Tukey")))
    summary(glht(modLMM1, lsm(pairwise ~ grazing|age)))#if in the model I also include age as fixed factor. why does it show the same results for all the sites?
    Anova(modLMM1) #effect of grazing, not of Site
    
    #bulk density
  
    mod0<-aov(Bulk~grazing*Site, hares)
    summary(mod0)#differences between grazing/no grazing only in stage 38, 63high and 128 (so also plant type), sites (ages) and interaction. So bulk density decreases with age and with late succession plants.
    par(mfrow = c(2,2))
    plot(mod0)
    par(mfrow = c(1,1))
    summary.lm(mod0)
    hist(resid(mod0))
    shapiro.test(resid(mod0))#normal
    tk0<-TukeyHSD(mod0)
    tk0
  
    
    mod0<-aov(Bulk~plant_type, hares) #bulk density differes between plant types (related to the grazing before)
    summary(mod0)
    summary.lm(mod0)
    hist(resid(mod0))
    shapiro.test(resid(mod0))#normal
    tk0<-TukeyHSD(mod0)
    tk0
    
    mod0<-aov(Bulk~granulosity, hares) #bulk density is higher in massive than blocky and granular,  and no differences between blocky and granular
    summary(mod0)
    summary.lm(mod0)
    hist(resid(mod0))
    shapiro.test(resid(mod0))#normal
    tk0<-TukeyHSD(mod0)
    tk0
    
    mod0<-aov(Bulk~age, hares) #bulk density is higher in massive than blocky and granular,  and no differences between blocky and granular
    summary(mod0)
    summary.lm(mod0)
    hist(resid(mod0))
    shapiro.test(resid(mod0))#normal
    tk0<-TukeyHSD(mod0)
    tk0
    
        
    
    #root density 
     
    mod0<-aov(log(densityroots)~plant_type, hares) #differences between plant types, no differences between sites
    summary(mod0)
    summary.lm(mod0)
    #plot(mod0)
    shapiro.test(resid(mod0))#normal
    tk0<-TukeyHSD(mod0)
    tk0
    
    mod0<-aov(log(densityroots)~age, hares) #differences between plant types, no differences between sites
    summary(mod0)
    summary.lm(mod0)
    #plot(mod0)
    shapiro.test(resid(mod0))#normal
    tk0<-TukeyHSD(mod0)
    tk0
  

  #The higher erosion in location 3 in erosion  comes from a combination of the bulk density, plant type and the granulosity of the soil. But we don't know for sure why it was more gronulous in location 3

