######################################################## #1) Change in colony strength and bee diseases over time? mod2 <- lm(Adult ~ Colony + INTSeason , data = diseases) mod3 <- lm(Adult ~ Colony + INTSeason + Farm:Colony, data = diseases) #repeat linear regression analysis steps 2-4 for more appropriate model, then summary(mod2) #or summary(mod3) #different way to look at data over time individually install.packages("ggpubr") # Visualize the data: colony strength over the season # Plot weight by group and color by group library("ggpubr") library(ggplot2) #This is for the before and after comparisons of adult frames 2019 g<- ggboxplot(diseases, x = "Bloom", y = "Adult", color = "Bloom", palette = c("#0C4893", "#0C4893"), ylab = "Colony Strength (Adult Frames)", xlab = "Bloom") g+geom_dotplot(binaxis='Adult', stackdir='center', dotsize=1)+ theme_light()+theme(legend.position = "none", axis.text=element_text(size=24, family = "Times"), axis.title.x=element_text(size=28,face="bold", family= "Times"), axis.title.y=element_text(size=28,face="bold", family= "Times"))+ geom_hline(yintercept=6, linetype='dotted', col = 'black') ##2020 diseases h <- ggboxplot(bees2, x = "Season", y = "Adult", color = "Season", palette = c("#A4CFF7", "#0C4893"), ylab = "Colony strength (adult frames)", xlab = "2020 Season") h+theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold")) # Wilcoxon two sample test for colony strength different beginning and end of season res <- wilcox.test(Adult ~ Season, data = diseases, exact = FALSE) res res1 <- wilcox.test(Adult ~ Season, data = strength, exact = FALSE, alternative = "less") res1 res2 <- wilcox.test(Adult ~ Season, data = strength, exact = FALSE, alternative = "greater") res2 # Looks like we fail to reject the null in all three cases, so adult strength didn't change # change numeric columns to factors with value labels diseases$INTSeason <- factor(diseases$INTSeason,levels=c(0,1), labels=c("Early","Late")) diseases$EFB.Severity <- factor(diseases$EFB.Severity,levels=c(0,1,2,3), labels=c(0,1,2,3)) ##Are EFB ratings higher at the end of the season? res3 <- wilcox.test(EFB.Severity ~ Season, data = diseases, exact = FALSE, alternative = "less") res3 res2020 <- wilcox.test(EFB.Severity ~ Season, data = diseasesplus2020, exact = FALSE, alternative = "less") res2020 res4 <- wilcox.test(Diseases ~ Season, data = diseases, exact = FALSE, alternative = "less") res4 # Results = higher EFB ratings at the end of the season # Graphs for disease data library(ggplot2) ##Graph showing number of diseases before and after pollination b <- ggplot(diseases, aes(x=Bloom, y=Diseases, fill=Farm)) + geom_boxplot() + geom_dotplot(binaxis='y', binwidth=.1, stackdir='center', dotsize=.4, position=position_dodge(.75))+ labs(x ="2019 Bloom ", y = "Number of diseases (per colony)") b + scale_fill_brewer(palette="Blues") + theme_classic() + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold")) ##EFB severity graphs #EFB levels, early and late, 2019 d <- ggplot(diseases, aes(x=Bloom, y=EFB.Severity, fill=Farm)) + geom_bar(mapping=aes(y=EFB.Severity, x=Bloom), stat = "identity", position="dodge") + geom_dotplot(binaxis='y', stackdir='center', binpositions="all", dotsize=.4, binwidth = 0.1, position=position_dodge(0.9))+ labs(x="Bloom", y = "EFB Severity Rating (Per Colony)") d1<- d + scale_fill_brewer(palette="Blues") + theme_classic()+ theme(axis.text.y=element_text(size=30, family = "Times"), axis.text.x=element_text(size=30, family = "Times"), axis.title.y=element_text(size=30,face="bold", family = "Times"), axis.title.x=element_text(size=30,face="bold", family = "Times"), legend.text=element_text(size=20, family = "Times"), legend.title=element_text(size=20, face= "bold", family = "Times"))+ geom_text(x="Early", y=3, label="2019", family = "Times New Roman", size= 16) d1 #2020 EFB severity library(plyr) revalue(bees2$Season, c("Beginning"="Early", "End"="Late"))->bees2$Season d2020 <- ggplot(bees2, aes(x=Season, y=EFB.Severity, fill=Farm)) + geom_bar(mapping=aes(y=EFB.Severity, x=Season), stat = "identity", position="dodge") + geom_dotplot(binaxis='y', stackdir='center', binpositions="all", dotsize=.4, binwidth = 0.1, position=position_dodge(0.9))+ labs(x="Bloom", y=" ") d12020 <- d2020 + scale_fill_brewer(palette="Blues") + theme_classic()+theme(legend.position = "none")+ theme(axis.text.y=element_text(size=30, family = "Times"), axis.text.x=element_text(size=30, family = "Times"), axis.title=element_text(size=30,face="bold", family = "Times"),legend.text=element_text(size=20, family = "Times"), legend.title=element_text(size=20, face= "bold", family = "Times"))+ geom_text(x="Early", y=3, label="2020", family = "Times New Roman", size= 16) d12020 ##Combine both EFB graphs side by side require(gridExtra) grid.arrange(d1, d12020, ncol=2) ###### #Graphs using frames converted into sq inches library(ggplot2) # Simple scatter plot sp <- ggplot(data=bees, aes(x=Colony, y=Adult.Area)) + geom_point() # Change line type and color sp + geom_hline(yintercept=2700, linetype="dashed", color = "orange") sp1 <- ggplot(data=bees, aes(x=Colony, y=Brood.Area)) + geom_point() # Change line type and color sp1 + geom_hline(yintercept=1620, linetype="dashed", color = "orange") #t test t.test(bees$Brood.Area, mu = 1620, alternative = "less", conf.level = .95) #################################################### # 2) Do variety and/or colony strength improve yield? # Loading packages and functions library (ggplot2) library(jtools) library(plyr) library(reshape2) library(corrplot) library(beeswarm) library(PerformanceAnalytics) #####THIS IS FOR 2019 # # # # ### Load in datasets (for andony) #berry.data has yield data berry.data <- read.csv("/Users/andonymelathopoulos/Desktop/berrydata.CSV",header=TRUE) #visitation.data has records of bee visitation to bushes within fields - a measure of pollinator activity on bushes visitation.data <- read.csv("/Users/andonymelathopoulos/Desktop/visitation.CSV",header=TRUE) #apiary data tells how strong colonies were at the different locations apiary.data <- read.csv("/Users/andonymelathopoulos/Desktop/apiary.CSV",header=TRUE) ###(for kennedy) ##I noticed PanAm is spelled differently in the apiary data from other datasets, so I am going to correct it so the values are consistent with the other dataframes revalue(newflight$Farm, c("Pan Am"="PanAm"))->newflight$Farm ## Another glitch is in how Liberty is spelled in visitation vs apiary data revalue(visitation$Variety, c("Liberty "="Liberty"))->visitation$Variety ## Also Eden in berry data has an extra space revalue(berry$Farm, c("Eden "="Eden"))->berry$Farm ### Summarize data - the experimental unit in this experiment is assumed to be the field (although ### in the future we may consider using some kind of nested design to capture within field variability ### although on passing glance this variation seems small). So I want to get data summarized down using ### plyr and then I am going to attempt to merge the three datasets into one for analysis. That will be ### my next step after graphing some of the basic field level data. #kennedy: I have files named differently than yours, so that is reflected in this script. i have your original if needed berry.summary<-ddply(berry, c("Farm","Variety"), summarise, fset = mean(FRUITSET,na.rm=TRUE), canes = mean(X..of.canes,na.rm=TRUE), clstrcane = mean(AVG.clusters.per.cane,na.rm=TRUE), bryclstr = mean(Avg..Berries.per.cluster,na.rm=TRUE), brywt = mean(Avg.berry.weight..g.,na.rm=TRUE)) #Calculate the flowers per acre - because I am curious if some fields have a lot more flowers per acre for the bees berry.summary$flwrs<-berry.summary$bryclstr*berry.summary$canes*berry.summary$clstrcane/(berry.summary$fset/100) #For yield I used 2000 bushes per acre and I used a constant of 0.00220462 to convert from g/acre to lbs/acre berry.summary$yield<-berry.summary$bryclstr*berry.summary$canes*berry.summary$clstrcane*berry.summary$brywt*2000*0.00220462 #let me clear some of the varriables out berry.summary<-subset(berry.summary, select=c("Farm", "Variety", "fset", "flwrs", "brywt", "yield")) #Let me move to the vistation data visitation.summary<-ddply(visitation, c("Farm","Variety", "Crop.Stage"), summarise, HBvisit = mean(Honey.Bee.Visits,na.rm=TRUE), BBvisit = mean(Bumble.Bee.Visits,na.rm=TRUE)) #Next I am going to rotate the data frame so crop stage are turned into columns and make one for HB and one for BB HBvisits<-dcast(visitation.summary, Farm + Variety ~ Crop.Stage, value.var = "HBvisit") BBvisits<-dcast(visitation.summary, Farm + Variety ~ Crop.Stage, value.var = "BBvisit") #Focus on early and full bloom because I assume these are most predictive. I will average across the two dates, but discard where no data was recorded HBvisits$HBmean<-rowMeans(HBvisits[,c("First Bloom", "Full Bloom","Petal Fall")], na.rm=TRUE) HBvisits<-subset(HBvisits, select=c("Farm", "Variety", "HBmean")) BBvisits$BBmean<-rowMeans(BBvisits[,c("First Bloom", "Full Bloom", "Petal Fall")], na.rm=TRUE) BBvisits<-subset(BBvisits, select=c("Farm", "Variety", "BBmean")) #join BB and HB data Bvisit<-merge(HBvisits, BBvisits, by=c("Farm","Variety")) #moving onto the apiary strength apiary.summary<-ddply(newflight, c("Farm","Variety"), summarise, Flight = mean(Flight,na.rm=TRUE)) #now merge everything together temp<-merge(Bvisit, apiary.summary, by=c("Farm","Variety"), all=TRUE) alldata<-merge(temp, berry.summary, by=c("Farm","Variety"), all=TRUE) write_csv(alldata, path = "alldata.csv") ### Lets examine some coarse relationships ## for sig tests and p values resyield1 <- cor.mtest(newcorr2019, conf.level = .95) resyield2 <- cor.mtest(mtcars, conf.level = .99) N #changes font in corrgram to times new roman par(family="Times") colnames(alldata.correlation) <-c("HB.visits","BB.visits","Flight","Fruit.set","Flowers","Berry.wt" ,"Yield" ) alldata <- rename(alldata,c('Berry.wt'='Berry.weight')) ##for 2019 correlogram alldata.correlation<-alldata[, c("HBmean","BBmean","Flight","fset","flwrs","brywt" ,"yield" )] newcorr2019 <- cor(alldata.correlation, use = "complete.obs", method=c("kendall")) y <- corrplot.mixed(corr = newcorr2019, upper = "ellipse", lower = "number", tl.col = "black", tl.cex = 1.5, cl.cex = 1.3, number.cex=1.5) require (GGally) library (GGally) #shows scatterplot and distribution for 2019 yield ggpairs(alldata.correlation) ###################################### # # # # # #THIS IS FOR 2020 ### Load in datasets #berry.data has yield data berry2020 <- read.csv("/Users/kennedygrant/Downloads/2020berry.CSV",header=TRUE) #visitation.data has records of bee visitation to bushes within fields - a measure of pollinator activity on bushes visitation2020 <- read.csv("/Users/kennedygrant/Downloads/2020visitation.CSV",header=TRUE) #apiary data tells how strong colonies were at the different locations apiary2020 <- read.csv("/Users/kennedygrant/Downloads/newflight2020.CSV",header=TRUE) berry2020$AVG.clusters.per.cane <- as.numeric(as.character(berry2020$AVG.clusters.per.cane)) berry.summary2020 <-ddply(berry2020, c("Farm","Variety"), summarise, fset = mean(FRUIT.SET,na.rm=TRUE), canes = mean(X..of.canes,na.rm=TRUE), clstrcane = mean(AVG.clusters.per.cane,na.rm=TRUE), bryclstr = mean(Avg..Berries.per.cluster,na.rm=TRUE), brywt = mean(Avg.berry.weight..g.,na.rm=TRUE)) #Calculate the flowers per acre - because I am curious if some fields have a lot more flowers per acre for the bees berry.summary2020$flwrs<-berry.summary2020$bryclstr*berry.summary2020$canes*berry.summary2020$clstrcane/(berry.summary2020$fset/100) #For yield I used 2000 bushes per acre and I used a constant of 0.00220462 to convert from g/acre to lbs/acre berry.summary2020$yield<-berry.summary2020$bryclstr*berry.summary2020$canes*berry.summary2020$clstrcane*berry.summary2020$brywt*2000*0.00220462 #let me clear some of the variables out berry.summary2020<-subset(berry.summary2020, select=c("Farm", "Variety", "fset", "flwrs", "brywt", "yield")) #Let me move to the vistation data visitation.summary2020<-ddply(visitation2020, c("Farm","Variety", "Crop.Stage"), summarise, HBvisit = mean(Honey.Bee.Visits,na.rm=TRUE), BBvisit = mean(Bumble.Bee.Visits,na.rm=TRUE)) #Next I am going to rotate the data frame so crop stage are turned into columns and make one for HB and one for BB HBvisits<-dcast(visitation.summary2020, Farm + Variety ~ Crop.Stage, value.var = "HBvisit") BBvisits<-dcast(visitation.summary2020, Farm + Variety ~ Crop.Stage, value.var = "BBvisit") #Focus on early and full bloom because I assume these are most predictive. I will average across the two dates, but discard where no data was recorded HBvisits$HBmean<-rowMeans(HBvisits[,c("First Bloom", "Full Bloom","Petal Fall")], na.rm=TRUE) HBvisits<-subset(HBvisits, select=c("Farm", "Variety", "HBmean")) BBvisits$BBmean<-rowMeans(BBvisits[,c("First Bloom", "Full Bloom", "Petal Fall")], na.rm=TRUE) BBvisits<-subset(BBvisits, select=c("Farm", "Variety", "BBmean")) #join BB and HB data Bvisit<-merge(HBvisits, BBvisits, by=c("Farm","Variety")) #moving onto the apiary strength apiary.summary2020<-ddply(apiary2020, c("Farm","Variety"), summarise, Flight = mean(Flight,na.rm=TRUE)) #now merge everything together temp2020<-merge(Bvisit, apiary.summary2020, by=c("Farm","Variety"), all=TRUE) alldata2020<-merge(temp2020, berry.summary2020, by=c("Farm","Variety"), all=TRUE) write_csv(alldata2020, path = "alldata2020.csv") ### Lets examine some coarse relationships ##preliminary correlation chart alldata2020.correlationnocl<-alldata2020[, c("HB.visits","BB.visits","Flight","Fruit.set","Flowers","Berry.wt" ,"Yield" )] chart.Correlation(alldata2020.correlation, histogram=TRUE, pch=19) #changes font in corrgram to times new roman par(family="Times") library(dplyr) colnames(alldata2020.correlationnocl) <-c("HB.visits","BB.visits","Flight","Fruit.set","Flowers","Berry.wt" ,"Yield" ) #name of berry weight was too long alldata2020.correlationnocl <- rename(alldata2020.correlationnocl,c('Berry.wt'='Berry.weight')) # 2020 corrgram for yield new2020yieldcorr <- cor(alldata2020.correlationnocl, use = "complete.obs", method=c("kendall")) # notice how we are ignoring missing data with the last argument # plot the correlation matrix x <- corrplot.mixed(corr = new2020yieldcorr, upper = "ellipse", lower = "number", tl.col = "black", tl.cex = 1.5, cl.cex = 1.3, number.cex=1.5) par(mfrow=c(1,2)) corrplot.mixed(corr = newcorr2019, upper = "ellipse", lower = "number", tl.col = "black", tl.cex = 1.3, cl.cex = 1.3, number.cex=1.5) corrplot.mixed(corr = new2020yieldcorr, upper = "ellipse", lower = "number", tl.col = "black", tl.cex = 1.3, cl.cex = 1.3, number.cex=1.5) ##################Plotting yield on flight ##Is variety or year necessary in yield graph? lma <- lm(Yield ~ Flight + Variety + Year, data = alldatabothyears) summary(lma) ####neither are significant ##this summary also gives quartiles for yield lmaa <- lm(Flight ~ Yield, data = alldatabothyears) summary(lmaa) quantile(as.numeric(alldatabothyears$Flight), na.rm=TRUE) ##Yield linear graph, one line, both years, variety denoted in legend ggplot(alldatabothyears, aes(x = Flight, y = Yield)) + geom_point(aes(colour = Variety, shape=Variety), size=4) + geom_smooth(method = "lm", col= "black")+labs( x="Incoming Flight (bees/minute)", y="Yield Estimate (kg/ha)")+ theme_light()+theme(text = element_text(family = "Times", size = 20), axis.text=element_text(size=18, family = "Times"), axis.title.x = element_text(size=20, face = "bold"), axis.title.y = element_text(size=20, face = "bold"), legend.title=element_text(face= "bold", size = 18))+ geom_vline(xintercept=38, linetype='dotted', col = 'black')+ scale_color_manual(values=c('#A4CFF7', '#25619A')) # in 2019 there was a strong relationship with the number of flowers and yield. #but also the lack of a relationship between HBmeans and fset and yield (although a strong relationship between BBmean and fset) #lets look at fset and apiary.yield.fit<-lm(brywt~Flight, data=alldata) summ(apiary.yield.fit) apiary.yield.fit<-lm(yield~Flight, data=alldata) summ(apiary.yield.fit) apiary.fset.fit<-lm(fset~Flight, data=alldata) summ(apiary.fset.fit) #lets look at the residuals pred <- fitted(apiary.yield.fit) res <- residuals(apiary.yield.fit) plot(pred, res, pch = 19 , col = " orange ", main = "Residual Plot: Residuals vs. Predicted Values", xlab = "Predicted Value", ylab = "Residuals") abline(h=0) qqnorm(residuals(apiary.yield.fit), pch = 19 , col = " orange ", main = "Normal Probability Plot of Residuals") qqline(residuals(apiary.yield.fit)) ##################################### # 3) Can the adult population of bees be predicted by flight, cluster, QR/QLESS? # Step 1: # merge two data frames by two factors (2019) total <- merge(flight,bees,by=c("Colony","Bloom")) pairs(~ Adult + Brood + Flight + Flight*Queen, data = total, pch = 19 , lower.panel = NULL,main = "Colony strength") cor(total$Brood, total$Adult) cor(total$Flight, total$Adult) cor(total$Queen, total$Adult) ## AIC for flight and temp lm1 <- lm(Adult ~ Flight, data = total) AIC(lm1) stopifnot(all.equal(AIC(lm1), AIC(logLik(lm1)))) BIC(lm1) lm2 <- lm(Adult ~ Flight + Flight.Temp, data = total) AIC(lm1, lm2) BIC(lm1, lm2) #temperature has a slightly better fit according to AIC summary(lm1) summary(lm2) # summary indiciates that the addition of flight in lm2 has a p val of greater than .05, I chose not to use it for the residuals ####RESIDUALS # Step 2: pred <- fitted(lm1) res <- residuals(lm1) plot(pred, res, pch = 19 , col = " orange ", main = "Residual Plot: Residuals vs. Predicted Values", xlab = "Predicted Value", ylab = "Residuals") abline(h=0) # Optional: normal qq plot qqnorm(residuals(lm1), pch = 19 , col = " orange ", main = "Normal Probability Plot of Residuals") qqline(residuals(lm1)) ## GRAPH: Adult and flight linear regression with temperature noted on scatterplot library(ggplot2) ##2019 version lm3 <- ggplot(total,aes(y=Adult,x=Flight,fill=Flight.Temp, color=Flight.Temp)) lm3 +geom_point()+stat_smooth(method="lm",se=TRUE)+labs(fill = "Temperature (F)", x="Incoming Flight(bees/minute)", y="Frames of Adult Bees", title = "2019")+theme( plot.title = element_text(size=16, face="bold"), axis.title.x = element_text(size=12), axis.title.y = element_text(size=12)) lm3 +geom_point()+stat_smooth(method="lm",se=TRUE)+labs(fill = "Temperature (F)", x="Incoming Flight (bees per minute)", y="Frames of Adult Bees", title = "2019")+theme(plot.title = element_text(size=20, face="bold"), axis.title.x = element_text(size=12),axis.title.y = element_text(size=12), axis.text = element_text(size = 20))+ scale_x_continuous(name="Incoming Flight (bees per minute)", limits=c(10, 146)) + scale_y_continuous(name="Frames of Adult Bees", limits=c(0, 20)) #Fun interactive multi line graph depending on temp ggPredict(lm2,interactive=TRUE) ##2020 data total2020 <- merge(flight2noav,bees2,by=c("Colony","Bloom")) lm4 <- ggplot(total2020,aes(y=Adult,x=Flight..bees.per.minute.,fill=Flight.Temp, color=Flight.Temp)) lm4 +geom_point()+stat_smooth(method="lm",se=TRUE)+labs(fill = "Temperature (F)", x="Incoming Flight (bees per minute)", y="Frames of Adult Bees")+theme( axis.title.x = element_text(size=12),axis.title.y = element_text(size=12), axis.text = element_text(size = 20)) ###Do you need year in flight graph? bothyearflight$Year <- factor(bothyearflight$Year,levels=c(2019,2020), labels=c("2019","2020")) lmb <- lm(Adult ~ Flight, data = bothyearflight) summary(lmb) lmc <- lm(Adult ~ Flight + Year, data = bothyearflight) summary(lmc) lmd <- lm(Adult ~ Flight + Flight.Temp, data = bothyearflight) lme <- lm(Adult ~ Flight*Year, data = bothyearflight) summary(lme) # Compare to more complex models anova(lmb, lmc) anova(lmb, lmd) ###flight temp not stat significant, but year is anova(lmb, lme) ###interaction between flight and year also significant lmf <- lm(Adult ~ Flight + Year + Flight*Year + Flight.Temp, data = bothyearflight) summary(lmf) anova(lmb, lmf) ###however, when all togehter, both are insignificant, and were thus removed #change flight temp to metric bothyearflight <- transform(bothyearflight, Flight.Temp.C = (Flight.Temp-32)*(9/5)) # Final model for both years lmboth <- ggplot(bothyearflight,aes(y=Adult,x=Flight, fill = Flight.Temp.C))+geom_point(aes(color=Flight.Temp.C)) lmboth +guides(color=FALSE)+stat_smooth(method="lm",se=TRUE, col="black")+labs(fill = "Temperature (°C)", x="Incoming Flight (bees/minute)", y="Frames of Adult Bees")+ theme_light()+ theme(text = element_text(family = "Times", size = 20), axis.title.x = element_text(size=20, family="Times", face = "bold"),axis.title.y = element_text(size=20, family = "Times", face = "bold"), axis.text = element_text(size = 20, family = "Times"), legend.title=element_text(face= "bold", size = 16))+ geom_hline(yintercept=6, linetype='dotted', col = 'black') ##getting the lm equation lmb <- lm(Adult ~ Flight, data = bothyearflight) summary(lmb) ##dowloaded both csvs to combine them with a new column indicating year #we ended up removing it from the graph but this is how i made the current csv titled bothyearflight library("readr") write_csv(total, path = "total.csv") write_csv(total2020, path = "total2020.csv") bothyearflight$Year <- factor(bothyearflight$Year,levels=c(2019,2020), labels=c("2019","2020")) ########### Preliminary and final correlation chart for 2019 colony strength info # Merge two data frames by Colony and Season to match flight up with correct day/hive total <- merge(flight, bees,by=c("Colony","Bloom")) # Install PerformanceAnalytics: install.packages("PerformanceAnalytics") # Use chart.Correlation(): library("PerformanceAnalytics") #Column numbers represent Adult, Brood, Cluster, and Flight ##shows distribution and linearity #2019 data my_data <- total[, c(9,15,16,17)] chart.Correlation(my_data, histogram=TRUE, pch=19) #2020 data my_data2020 <- total2020[, c(10,14,15,16)] chart.Correlation(my_data2020, histogram=TRUE, pch=19) #yield 2019 data (retired) table(Yield$all) ls() my_alldata <- all[, c(5,6,7,8)] all$Yield <- as.numeric(as.character(all$Yield)) chart.Correlation(my_alldata, histogram=TRUE, pch=19) ###This is the more advanced corrgram script ###Final corrgram for 2019 strength data ## #added change to method, now kendall library(corrgram) library(ellipse) library(RColorBrewer) col_order <- c("Adult", "Brood", "Cluster", "Flight") my_data2 <- my_data[, col_order] corr2019 <- cor(my_data2, use = "complete.obs", method=c("kendall")) corrplot.mixed(corr = corr2019, upper = "ellipse", lower = "number", tl.col = "black", tl.cex = 1.5, cl.cex = 1.3, number.cex=1.5) ggpairs(my_data2) ##checking assumptions for kendall chart.Correlation(my_data2, histogram=TRUE, pch=19) ####other corrgram options, none used in final ## # R = cor(total[15,16,17,9]) round(R, 3) my_data <- total[, c(15,16,17,9)] res <- cor(my_data) round(res, 2) corrgram(my_data) plotcorr(res, col = colorRampPalette(c("#E08214", "white", "#8073AC"))(10), type = "lower") # Used to build a Pannel of 100 colors with Rcolor Brewer my_colors <- brewer.pal(9, "Blues") my_colors <- colorRampPalette(my_colors)(100) # Order the correlation matrix ord <- order(res[1, ]) data_ord <- res[ord, ord] plotcorr(res , col=my_colors[data_ord*50+50] , mar=c(1,1,1,1), type = "lower" ) install.packages("Hmisc") ##Use rcorr() function install.packages("Hmisc") library("Hmisc") res2 <- rcorr(as.matrix(my_data)) res2 corrplot(res2$r, type="upper", order="hclust", p.mat = res2$P, sig.level = 0.01, insig = "blank") ###################### ####Don't need this anymore ##Used this to check out cluster vs adult for 2020 data to find anomalies install.packages("ggrepel") require(ggrepel) p3 <- ggplot(bees2[which(bees2$Cluster>0),], aes(x=Adult, y=Cluster)) + geom_point() + geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) p3 +geom_text(aes(label=Colony), size=7, colour="blue") p4<- ggplot(flightminusangelvalley, aes(x=Flight, y=Adult)) + geom_point() + geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) p4 +geom_text(aes(label=Colony), size=7, colour="blue") lmtemp <- lm(Adult ~ Flight, data = flightminusangelvalley) summary(lmtemp)