rm(list = ls()) #script of graphs and statistics of the comparison between algorithms library("ggplot2") library("dplyr") library("Rmisc") # Read data base: Geographical comparison data<-read.csv("/media/claudia/Tesis/Capitulo1/requisito_2019/analisis_general/analysis_comparation_2024.csv") #data base geographical comparison head(data) # Values for metric ----------------------------------------------------- IO<-cbind.data.frame (value=data$IO, metric="IO", sp=data$sp, specie=data$specie, model=data$model, period=data$period, direction=data$direction) FPR<-cbind.data.frame (value=data$FPR, metric="FPR", sp=data$sp, specie=data$specie, model=data$model, period=data$period, direction=data$direction) FNR<-cbind.data.frame (value=data$FNR, metric="FNR", sp=data$sp, specie=data$specie, model=data$model, period=data$period, direction=data$direction) TSS <- cbind.data.frame (value=data$TSS, metric="TSS", sp=data$sp, specie=data$specie, model=data$model, period=data$period, direction=data$direction) #Statistics sum_TSS <- summarySE(TSS,measurevar="value", groupvars=c("model","direction")) sum_IO <- summarySE(IO,measurevar="value", groupvars=c("model","direction")) sum_FPR <- summarySE(FPR,measurevar="value", groupvars=c("model","direction")) sum_FNR <- summarySE(FNR,measurevar="value", groupvars=c("model","direction")) ?summarySE stast_metrics<-rbind.data.frame((cbind(sum_TSS,metric="TSS")), (cbind(sum_IO,metric="IO")), (cbind(sum_FNR,metric= "FNR")), (cbind(sum_FPR,metric="FPR"))) head(stast_metrics) #write.csv(stast_metrics, "/media/claudia/Tesis/Capitulo1/requisito_2019/analisis_general/estadisticas_metrics_2024.csv") #For plots base<-rbind.data.frame(TSS,IO,FPR,FNR) unique (base$metric) base1<-rbind.data.frame(TSS,IO,FPR,FNR) head(base) unique(data$model) #------------Plot Figure 1 #------------Bar Plot library(ggthemes) pd=position_dodge(width=0.5) panel_labels<- c("bio" ="Bioclim", "brt"="BRT", "gam"="GAM", "glm"="GLM", "max"="MaxEnt", "rf"="RF", "svm"="SVM") panel_labels2<- c("TSS" ="True Skill Statistic (TSS)", "IO"= "Overlap Index (OI)", "FPR"="False Positive Rate (FPR)", "FNR"="False Negative Rate (FNR)" ) barplot<- ggplot(base1, aes(x= model, y = value , fill = direction)) + facet_wrap(~metric, labeller = labeller(metric = panel_labels2))+ theme(legend.position = "bottom")+ scale_x_discrete("Model", labels=c("bio" ="Bioclim", "brt"="BRT", "gam"="GAM", "glm"="GLM", "max"="MaxEnt", "rf"="RF", "svm"="SVM"))+ stat_boxplot(geom = "errorbar", width=0.2, position = pd)+ geom_boxplot(width=0.5, position=pd)+theme(legend.position = "bottom")+ scale_fill_manual(name="Direction",values=c( "#56B4E9", "#E69F00"))+theme_base()+ theme(axis.text.x = element_text(size = 12, angle = 40))+ ylab("Value") barplot #------------Plot Figure 1 head(base1) pd1 = position_dodge(0.2) pd2 = position_dodge(0.65) #function that outputs mean, lower limit and upper limit of 95% CI data_summary <- function(x) { m <- mean(x, na.rm=TRUE) sem <-sd(x, na.rm=TRUE)/sqrt(sum(!is.na(x))) ymin<-m-1.96*sem ymax<-m+1.96*sem return(c(y=m,ymin=ymin,ymax=ymax)) } ggplot(base1, aes(x = model, y = value, fill=direction, color=direction, shape=direction))+ geom_dotplot(binaxis='y', stackdir='center',stackratio=1.2, dotsize=0.8, binwidth=0.02, position=pd2,alpha = 0.5 ) + stat_summary(fun.data=data_summary, position=pd1, geom="errorbar", width=0.05) + stat_summary(fun.data=data_summary, position=pd1, geom="point", size=2) + scale_fill_manual(name="Direction",values=c( "#FF4500", "#0000EE")) + scale_color_manual(name="Direction",values=c( "#FF4500", "#0000EE")) + scale_shape_manual(name="Direction",values = c(15,17)) + theme_base()+facet_wrap(~metric, labeller = labeller(metric = panel_labels2)) + scale_x_discrete("Model", labels=c("bio" ="Bioclim", "brt"="BRT", "gam"="GAM", "glm"="GLM", "max"="Maxent", "rf"="RF", "svm"="SVM"))+ ylab("Value") + theme(axis.text.x = element_text(size = 16)) #---------------------------------STATISTICS------------------------------------ #-------------------------COMPARISON BETWEEN ALGORITHMS--------------------------------- library("coin") #Algorithmic comparison between direction: Wilcox TEST head(base) unique(base$model) #TSS #Change metric (IO, FNR, FPR) wmax<-subset(base, metric=="TSS" & model=="max") w_max<-wilcox.test(value~direction, data=wmax) wmax1<-cbind.data.frame(W=w_max$statistic, p.value=w_max$p.value, metodo=w_max$method, model="max", metric="TSS") head(wmax1) wbio<-subset(base, metric=="TSS" & model=="bio") w_bio<-wilcox.test(value~direction, data=wbio) wbio1<-cbind.data.frame(W=w_bio$statistic, p.value=w_bio$p.value, metodo=w_bio$method, model="bio", metric="TSS") wbrt<-subset(base, metric=="TSS" & model=="brt") w_brt<-wilcox.test(value~direction, data=wbrt) wbrt1<-cbind.data.frame(W=w_brt$statistic, p.value=w_brt$p.value, metodo=w_brt$method, model="brt", metric="TSS") wrf<-subset(base, metric=="TSS" & model=="rf") w_rf<-wilcox.test(value~direction, data=wrf) wrf1<-cbind.data.frame(W=w_rf$statistic, p.value=w_rf$p.value, metodo=w_rf$method, model="rf", metric="TSS") wglm<-subset(base, metric=="TSS" & model=="glm") w_glm<-wilcox.test(value~direction, data=wglm) wglm1<-cbind.data.frame(W=w_glm$statistic, p.value=w_glm$p.value, metodo=w_glm$method, model="glm", metric="TSS") wgam<-subset(base, metric=="TSS" & model=="gam") w_gam<-wilcox.test(value~direction, data=wgam) wgam1<-cbind.data.frame(W=w_gam$statistic, p.value=w_gam$p.value, metodo=w_gam$method, model="gam", metric="TSS") wsvm<-subset(base, metric=="TSS" & model=="svm") w_svm<-wilcox.test(value~direction, data=wsvm) wsvm1<-cbind.data.frame(W=w_svm$statistic, p.value=w_svm$p.value, metodo=w_svm$method, model="svm", metric="TSS") w_tss<-rbind.data.frame(wbio1, wbrt1, wgam1, wglm1, wmax1, wrf1, wsvm1) #write.csv(w_tss, "C:/Users/Vertebrados/Google Drive/Tesis/cap1_capacidad predictiva cc/analisis_2021/w_tss.csv") #Comparison between algorithms by direction #Change metric (IO, FNR, FPR) #TSS-FORECAST ktssF<-subset(base, metric=="TSS" & direction=="Forecast") k_tssF<-kruskal.test(value~model, data=ktssF) ktss1F<-cbind.data.frame(estadistica=k_tssF$statistic, df=k_tssF$parameter,p.value=k_tssF$p.value, metric="TSS", direction="Forecast") pw_tssF<-posthoc.kruskal.nemenyi.test(ktssF$value, ktssF$model, dist = "Tukey") pwtssF<-cbind.data.frame(pw_tssF$p.value, metric="TSS", direction="Forecast", estadistico="Nemenyi test") #write.csv(ktss1F,"C:/Users/Vertebrados/Google Drive/Tesis/cap1_capacidad predictiva cc/analisis_2021/kruskal/tss_forecast_2024.csv") #write.csv(pwtssF,"C:/Users/Vertebrados/Google Drive/Tesis/cap1_capacidad predictiva cc/analisis_2021/wilcox_k/tss_forecast_2024.csv") #TSS-Hindcast ktssH<-subset(base, metric=="TSS" & direction=="Hindcast") k_tssH<-kruskal.test(value~model, data=ktssH) ktss1H<-cbind.data.frame(estadistica=k_tssH$statistic, df=k_tssH$parameter,p.value=k_tssH$p.value, metric="TSS", direction="Hindcast") pw_tssH<-PMCMRplus::kwAllPairsNemenyiTest(ktssH$value, ktssH$model, dist = "Tukey") pwtssH<-cbind.data.frame(pw_tssH$p.value, metric="TSS", direction="Hindcast", estadistico="Nemenyi test") #write.csv(ktss1H,"C:/Users/Vertebrados/Google Drive/Tesis/cap1_capacidad predictiva cc/analisis_2021/kruskal/tss_Hindcast_2024.csv") #write.csv(pwtssH,"C:/Users/Vertebrados/Google Drive/Tesis/cap1_capacidad predictiva cc/analisis_2021/wilcox_k/tss_Hindcast_2024.csv")