
#=============================================================================================
#Name        : R code for paper Incorporation of ammonia-nitrogen into black soldier fly larvae proteins
#Author(S)   : Alejandro Parodi - ORCID 0000-0003-1351-138X
#Reviewer(S) : --
#Description : Tables, Figures and statistical analyses
#=============================================================================================
#1. Setup ------------------------------------------------------------------------------------
# Clean environment
rm(list=ls())

#Libraries installation
packages <- c("plyr","dplyr","tidyr", "plotrix", "ggplot2", "broom", "car", "networkD3", "tibble", "webshot")
install.packages(setdiff(packages, rownames(installed.packages())))

for (package in packages){
  library(package, character.only = TRUE)
}

#2. Data acquisition------------------------------------------------------------------

#Set working directory and import data
setwd("/Users/lupuna/OneDrive - Wageningen University & Research/PhD/isotope/paper/data/directory")

#Set output path
path_out <- "/Users/lupuna/OneDrive - Wageningen University & Research/PhD/isotope/paper/data/directory/outputs"

biomass <-  read.csv("in_larvae_weight.csv")
colnames(biomass)

#Explanation columns
#[Sample] -> Type of sample (manure, starter, larvae or residues)
#[Treatment] -> Specifies the treatment (Enriched, Enriched_Digesta and Control) for the data shown in the row
#[Replicate] -> Number of replicate (R1-R5)
#[Larvae_added] -> Number of larvae added at the start of the experiment
#[Larvae_found] -> Number of larvae found when larvae were removed from manure
#[Total_fresh_before_NFreediet] -> Weight in grams of the larvae when were removed from manure
#[Total_fresh] -> Weight in grams of the larvae when were removed from the Nitrogen Free diet. 
                  #For the treatment Enriched_Digesta, this value is equal to the previous column 
                  #given that those larvae were not put in the N free diet.


total_balance <-  read.csv("in_balance.csv")
colnames(total_balance)

#[Type] -> Specifies if the data show in the row is for an input or output
#[Sample] -> Type of sample (manure, starter, larvae or residual_material)
#[Treatment] -> Specifies the treatment (Enriched, Enriched_Digesta and Control) for the data shown in the row
#[Replicate] -> Number of replicate (R1-R5)
#[Total_fresh] -> Weight in grams of the larvae when were removed from the Nitrogen Free diet. For the treatment Enriched_Non_NFree, this value is equal to the previous column given that those larvae were not put in the N free diet.
#[DM] -> DM content (%) af larvae
#[N] -> Total nitrogen content (%) of DM larvae
#[N15] -> 15N atomic percentage (%) in DM larvae


larvae_balance <-  read.csv("in_protein_extraction.csv")
colnames(larvae_balance)

#[Type] -> Specifies if the data show in the row is for an input or output
#[Sample] -> Type of sample (insoluble nitrogen fraction, soluble dialysed nitrogen fraction)
#[Treatment] -> Specifies the treatment (Enriched, Enriched_Digesta and Control) for the data shown in the row
#[Replicate] -> Number of replicate (R1-R5)
#[Yield] - Percentage (%) of DM recovered after the extraction of the insoluble and soluble dialysed N fractions
#[N] -> Total nitrogen content (%) of each N DM fraction
#[N15] -> 15N atomic percentage (%) of each N fraction


ammonia <- read.csv("in_ammonia.csv")
names(ammonia)

#[Type] -> Specifies if the data show in the row is for an input or output
#[Sample] -> Type of sample (manure and residual material)
#[Treatment] -> Specifies the treatment (Enriched, Enriched_Digesta and Control)
#[Replicate] -> Number of replicate (R1-R5)
#[nutrient] -> Only contains data for ammonia nitrogen (N-NH3)
#[value] -> ammonia-nitrogen in 100 g DM sample


#3. Analyses Table 1------------------------------------------------------------------

#Table 1 - Comparison of larval weights among treatments

#Calculating larva weight in mg
biomass
biomass$weight_beforeNfree <- (biomass$Total_fresh_before_Nfreediet/biomass$Larvae_found)*1000
biomass$weight_afterNfree <- (biomass$Total_fresh/biomass$Larvae_found)*1000
biomass <- biomass %>%
  select(1, 2, 3, 8, 9)

biomass <- biomass %>%
  gather("weight", "value", 4:5)

#Clean dataset to avoid NA
biomass$conca <- paste0(biomass$Treatment, biomass$weight)
biomass <- biomass %>% 
  subset(conca!="Enriched_Digestaweight_afterNfree")

#edit labels
biomass$weight <- ifelse(biomass$weight=="weight_beforeNfree", "Before_Nfree", "After_Nfree")
biomass$weight <- factor(biomass$weight, levels=c("Before_Nfree",
                                                  "After_Nfree"))
#Calculate mean +/- standard error
biomass_avg <- ddply(biomass, .(Treatment, weight), summarize, 
                           mean= mean(value, na.rm=TRUE),
                           se= std.error(value, na.rm = TRUE))

biomass_avg$value <- paste0(round(biomass_avg$mean, digits=0), " ± ", round(biomass_avg$se, digits=2))
biomass_avg <- biomass_avg %>%
  select(1, 2, 5)

#Format table
biomass_avg <- biomass_avg %>%
  spread("weight", "value")

write.csv(biomass_avg, file.path(path_out, "out_Table1.csv"),row.names = F)

#Statistics - differences in final weight before and after N_free and between treatments?
#Asses normality
a <- biomass %>% 
  group_by(Treatment, weight) %>%
  do(tidy(shapiro.test(.$value)))
a

#Assess homogeneity of variances (Enriched_Digesta is removed because it only has one level  (Before N_free diet))
#By type treatment
b <- biomass %>%
  subset(Treatment!="Enriched_Digesta") %>%
  group_by(Treatment) %>%
  do(tidy(var.test (.$value ~ .$weight)))

b

#By type of weight
c <- biomass %>%
  subset(Treatment!="Enriched_Digesta") %>%
  group_by(weight) %>%
  do(tidy(var.test (.$value ~ .$Treatment)))
c

#Anova
#Overall (control, enriched, enriched_nfree, before and after Nfree diet)
anova1 <- biomass %>% 
  group_modify(~ broom::tidy(aov(value~Treatment, data = .x)))
anova1

#Given that the anova showed differences among treatments, we do a post hoc test to detect
#which values are different
#Post hoc - Tabke S1
ph1 <- biomass %>%
  group_modify(~ broom::tidy(TukeyHSD(aov(value~conca, data = .x))))
ph1 <- ph1 %>%
  select(2:7)
ph1
write.csv(ph1, file.path(path_out,'out_TableS1.csv'), row.names = F)



#4. Analyses Table 2------------------------------------------------------------------
# Table 2 - N and AT% content in defatted, dialysed protein, pellet and residual material

#Select larvae and residual material
table2 <- total_balance %>%
  subset(Sample=="Larvae" | Sample=="Residual_material") %>%
  select(2:4, 7,8)

#Select insoluble and soluble N fractions
table2_add <- larvae_balance %>%
              select(2:4, 6:7)



#Bind the two datasets
table2 <- rbind(table2, table2_add)
table2 <- gather(table2,"nutrient", "value", 4:5)


#Calculate average and std error
table2_avg <- ddply(table2, .(Sample, Treatment, nutrient), summarize, 
                    mean= mean(value, na.rm=TRUE),
                    se= std.error(value, na.rm = TRUE))

table2_avg2 <- table2_avg
table2_avg$mean_se <- paste0(round(table2_avg$mean, digits=4), " ± ", round(table2_avg$se, digits=4))
table2_avg$conca <- paste0(table2_avg$Sample, "_", table2_avg$nutrient)
table2_avg <- table2_avg %>%
  select(7, 2, 6) %>%
  spread("conca", "mean_se") %>%
  select(1, 4, 5, 2, 3, 6, 7)

table2_avg
write.csv(table2_avg, file.path(path_out,"out_table2.csv"), row.names = F)


#Figure 2
unique(table2_avg2$Sample)
table2_avg2$Sample <- factor(table2_avg2$Sample, levels = c("Larvae",
                                                            "Insoluble_N",
                                                            "Soluble_dialysed_N",
                                                            "Residual_material"))

table2$Sample <- factor(table2$Sample, levels = c("Larvae",
                                                            "Insoluble_N",
                                                            "Soluble_dialysed_N",
                                                            "Residual_material"))

ggplot(data=table2_avg2, aes(fill=Treatment, y=mean, x=Treatment)) + 
  geom_bar(position="stack", stat="identity") +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.3) +
  geom_point(data=table2, aes(Treatment, value), position = "jitter", fill="white") +
  facet_wrap(nutrient~Sample, ncol = 4, scales = "free_y") +
  theme(panel.background = element_rect(fill = "white"),
        axis.line.x = element_line(colour = "black"),
        axis.line.y = element_line(colour = "black"),
        axis.text.x = element_text(angle = 65, vjust = 1, hjust=1)) +
  ylab("")+
  xlab("")+
  geom_text(aes(label=round(mean, digits = 1)), size = 3, position = position_stack(vjust = 1.2)) +
  scale_fill_manual(values = c( "#a89689","#f4a460", "#20a38b"))

ggsave(filename = "Figure2.pdf", path = path_out, width = 20, height = 15, units="cm", dpi = 300)


#statistical comparisons
#Asses normality
d <- table2 %>% 
  subset(!is.na(value)) %>% 
  group_by(Treatment, Sample, nutrient) %>%
  do(tidy(shapiro.test(.$value)))
d

#Assess homogeneity of variances - some of these were marginally significant 0.03-0.04.
e <- table2 %>%
  subset(Treatment!="Enriched_Digesta") %>%
  subset(!is.na(value)) %>% 
  group_by(Sample, nutrient) %>%
  do(tidy(leveneTest(.$value ~ .$Treatment)))
e



#anova - supplementary table
anova2 <- table2 %>% 
          group_by(Sample, nutrient) %>%
          group_modify(~ broom::tidy(aov(value~Treatment, data = .x)))
anova2

#posthoc test - supplementary table
ph2 <- table2 %>%
  group_by(Sample, nutrient) %>%
  group_modify(~ broom::tidy(TukeyHSD(aov(value~Treatment, data = .x))))
ph2

write.csv(ph2, file.path(path_out, "TableS2_1.csv"), row.names = F)

#given that there are only two samples of residual material (enriched and control), a t test was performed
#Only 15N is different.
residual_t <- table2 %>%
  subset(Sample=="Residual_material") %>%
  group_by(nutrient) %>%
  group_modify(~ broom::tidy(t.test(value~Treatment, data = .x)))

residual_t

write.csv(residual_t, file.path(path_out, "out_TableS2_2.csv"), row.names=F)


#5. Analyses Table S5------------------------------------------------------------------
# Table S5 - Nutrient content of larval samples and residual material

tableS5 <- subset(total_balance, (Sample!="Starter")) %>%
          select(1:4, 6:7) %>%
          gather("nutrient", "value", 5, 6 )


#Add ammonia content to the df
tableS5 <- rbind(tableS5, ammonia)

#Calculate mean values, standard error and format table
tableS5 <- ddply(tableS5, .(Type, Sample, Treatment, nutrient), summarize, 
                mean= mean(value, na.rm=TRUE),
                se= std.error(value, na.rm = TRUE))

tableS5$mean_se <- ifelse(tableS5$Treatment=="-", round(tableS5$mean, digits=2), paste0(round(tableS5$mean, digits=2), " ± ", round(tableS5$se, digits=2)))

tableS5 <- tableS5 %>%
          select(1:4,7) %>%
          spread("nutrient", "mean_se")

#save file
write.csv(tableS5, file.path(path_out,"output_tableS5.csv"), row.names = F)


#6. Nutrient balance nitrogen----------------------------------------------------------------

#Calculates nitrogen balance (larvae, residual material and losses). Column value is expressed in percentage.
nitrogen <- total_balance %>%
            mutate(total_n = (Total_fresh_afterNfree.g. * (DM/100)*(N/100)*1000)) %>%
            select(2:4, 9) %>%
            spread("Sample", "total_n") %>%
            mutate(larvae_bioconversion = ((Larvae - Starter)/(Manure))*100) %>%
            mutate(residualM_bioconversion = (Residual_material/Manure)*100) %>%
            mutate(losses_bioconversion = 100 - (larvae_bioconversion + residualM_bioconversion)) %>%
            select(1, 2, 7:9) %>%
            subset(Treatment != "Enriched_Digesta") %>%
            gather("balance", "value", 3:5)


#statistics
#normality
n_normality <- nitrogen %>% 
               group_by(Treatment, balance) %>%
              do(tidy(shapiro.test(.$value)))
n_normality

#homogeneity of variances
n_homogeneity <- nitrogen %>%
                 group_by(balance) %>%
                 do(tidy(var.test (.$value ~ .$Treatment)))
n_homogeneity

#t_test - #Results are included in Figure 2
t_test_n_balance <- nitrogen %>% 
                    group_by(balance) %>%
                    group_modify(~ broom::tidy(t.test(value~Treatment, data = .)))
t_test_n_balance

#Table with average results for figure 2
nitrogen_avg <- nitrogen %>%
                group_by(Treatment, balance) %>%
                summarise(mean = mean(value),
                          std_error = std.error(value)) %>%
                mutate(mean_se = paste(round(mean,digits=0), "±", round(std_error, digits = 1))) %>%
                select(1, 2, 3,4, 5) %>%
                mutate(balance= (ifelse(balance=="larvae_bioconversion", "Larvae", 
                                ifelse(balance=="residualM_bioconversion", "Residual material", "Losses"))))


#Figure 2A
#Order position of bars
nitrogen_avg$Treatment <- factor(nitrogen_avg$Treatment, levels=c("Enriched", "Control"))
nitrogen_avg$balance <- factor(nitrogen_avg$balance, levels=c("Losses", "Larvae", "Residual material"))

ggplot(nitrogen_avg, aes(y= mean, x=Treatment, fill=balance), stat="identity")+
  geom_col(width = 0.85) + 
  geom_text(aes(label = (paste0(round(mean, digits = 0), " ± ", round(std_error, digits = 1))), group = balance), position = position_stack(vjust = 0.5), size=3)+
  scale_fill_manual(values=c("#FFA500", "#8FBC8F", "#808000"))+
  labs(y=expression("Total nitrogen initially provided (%)"))+ 
  xlab("") +
  theme(strip.background = element_rect(colour="white", fill="white"))+
  theme(strip.text = element_text(size=10, face="bold", hjust=0.5))+ 
  theme(panel.grid.major  = element_line(colour = "white", size = 0.2))+ 
  theme(panel.grid.minor  = element_line(colour = "white", size = 0.5))+ 
  theme(panel.background = element_rect(fill="white"))+
  ggtitle("") + 
  theme(plot.title=element_text(size=13, face="bold", hjust = 1, vjust=0)) + 
  theme(plot.title = element_text(hjust = 0)) + 
  theme(axis.text.x = element_text(size = 10, face = "plain",  angle = 0, hjust = 0.5, vjust=1, colour="black", lineheight = 0)) + 
  theme(strip.text.y = element_text(angle = 0)) + 
  theme(strip.background=element_rect(fill="white")) + 
  guides(fill=guide_legend(title="Legend")) +
  theme(axis.title.y = element_text(size=12, face="bold")) + 
  theme(axis.title.x  = element_text(size=14, face="bold"))+ 
  coord_cartesian(ylim=c())+
  theme(legend.position="bottom")+
  scale_y_continuous(expand = c(0, 0),limits=c(0, 110),breaks = seq(0, 100, by = 25))

#7. 15N balance and Sankey----------------------------------------------------------------

#backgroundN
background_N15 <- total_balance %>%
                  subset(Treatment == "Control" & Sample!="Starter") %>%
                  select(2:4, 8) %>%
                  group_by(Sample) %>%
                  summarise(backg_15N = mean(N15))

#15N balance among larvae, residual material and losses (calculated by difference)
fifteen <- total_balance %>%
           subset(Sample!= "Manure" & Sample!="Starter") %>%
          inner_join(background_N15) %>% #average 15N (at %) of the control groups
           mutate(total_15N = (Total_fresh_afterNfree.g. * (DM/100)*(N/100)* (N15/100)*1000)) %>%
           mutate(background_15N = (Total_fresh_afterNfree.g. * (DM/100)*(N/100)* (backg_15N/100)*1000)) %>%
           mutate(tracer_15N = total_15N - background_15N) %>%
           mutate(tracer_added = 2.615) %>%
           mutate(N15_bioconversion = (tracer_15N/tracer_added)*100) %>%
           select(2:4, 14) %>%
           subset(Treatment!= "Control") %>%
           spread("Sample", "N15_bioconversion")%>%
           mutate(Losses = 100 - (Larvae + Residual_material)) %>%
           gather("Sample", "value", 3:5) %>%
           group_by(Treatment, Sample) %>%
           summarise(mean = format(mean(value), scientific = F), #avoid scientific notation
                     std_error = std.error(value))

#Balance within larvae
#create df with fresh yield per replicate and DM
fresh_yield <- total_balance %>%
               subset(Sample=="Larvae") %>%
               mutate(tre_re = paste(Treatment, Replicate)) %>%
               select(9, 5, 6)

#Create df with 15N background content
protback_N15 <- larvae_balance %>%
  subset(Treatment == "Control") %>%
  select(2:4, 7) %>%
  group_by(Sample) %>%
  summarise(b_15N = mean(N15))

#Create dataset with 15N in whole larvae to calculate soluble N & losses
whf <- total_balance %>%
  subset(Sample!= "Manure" & Sample!="Starter") %>%
  inner_join(background_N15) %>% #average 15N (at %) of the control groups
  mutate(total_15N = (Total_fresh_afterNfree.g. * (DM/100)*(N/100)* (N15/100)*1000)) %>%
  mutate(background_15N = (Total_fresh_afterNfree.g. * (DM/100)*(N/100)* (backg_15N/100)*1000)) %>%
  mutate(tracer_15N = total_15N - background_15N) %>%
  mutate(tracer_added = 2.615) %>%
  mutate(N15_bioconversion = (tracer_15N/tracer_added)*100) %>%
  mutate(tre_re = paste0(Treatment, Replicate)) %>%
  subset(Treatment!= "Control" & Sample=="Larvae") %>%
  select(15,14) 

#15N balance within larvae
larvae_fifteen <- larvae_balance %>%
                  mutate(tre_re = paste(Treatment, Replicate)) %>%
                  inner_join(fresh_yield) %>%
                  inner_join(protback_N15) %>%
                  mutate(total_15N = (Total_fresh_afterNfree.g. * (DM/100)* (Yield/100) * (N/100)* (N15/100)*1000)) %>%
                  mutate(background_15N = (Total_fresh_afterNfree.g. * (DM/100)* (Yield/100) * (N/100)* (b_15N/100)*1000)) %>%
                  mutate(tracer_15N = total_15N - background_15N) %>%
                  mutate(tracer_added = 2.615) %>%
                  mutate(N15_bioconversion = (tracer_15N/tracer_added)*100) %>%
                  select(2:4, 16) %>%
                  subset(Treatment!= "Control") %>%
                  spread("Sample", "N15_bioconversion") %>%
                  mutate(tre_re = paste0(Treatment, Replicate)) %>%
                  inner_join(whf) %>%
                  mutate(Soluble_N = (N15_bioconversion - Insoluble_N)) %>%
                  mutate(Dialysis_losses = Soluble_N - Soluble_dialysed_N) %>%
                  select(1, 2, 3, 7, 4, 8) %>%
                  gather("Sample", "value", 3:6) %>%
                  group_by(Treatment, Sample) %>%
                  summarise(mean = mean(value),
                            std_error = std.error(value))



#this is used to create the dataset for sankey

from <- c("tracer_15N", "tracer_15N", "tracer_15N", "Larvae", "Larvae", "Soluble N", "Soluble N")
to <- c("Losses", "Larvae", "Residues","Soluble N","Insoluble N", "Dialysed protein N", "Dialysis losses")
value <- c(76.63, 12.81,  10.55, 2.87,  9.95, 2.17, 0.69)

links <-data.frame(from, to, value)
links$source <- ifelse(links$from=="tracer_15N", 0,
                       ifelse(links$from=="Larvae", 2, 4))

links$type <- ifelse(links$to=="Residues", 3,
                     ifelse(links$to=="Larvae", 2,
                            ifelse(links$to=="Losses", 1, 
                                   ifelse(links$to=="Insoluble N", 5,
                                          ifelse(links$to=="Soluble N", 4,
                                                 ifelse(links$to=="Dialysed protein N", 6, 7))))))



nodes <- data.frame(name=c("tracer_15N", "Losses (76.6 ± 0.95 %", 
                           "Larvae 12.8 ± 0.84 %", 
                           "Residues 10.5 ± 0.31 %", 
                           "Soluble N 2.9 ± 0.69 %", 
                           "Insoluble N 9.9 ± 0.28 %", 
                           "Dialysed protein N 2.1 ± 0.17 %", 
                           "Dialysis losses 0.69 ± 0.73 %"))
nodes$val <- (1:nrow(nodes))-1

sankeyNetwork(Links = links, Nodes = nodes,
              Source = "source", Target = "type",
              Value = "value", NodeID = "name", 
              fontSize= 13,  nodeWidth = 7, nodePadding = 15, sinksRight=FALSE, height=400, width=700)

#save as html first (manually and then run this to convert the html into pdf, which can pass post edition
#in inkscape or adobe illustrator)
#webshot("out_sankey.html", "Sankey_Fig.pdf", delay = 0.2)


