############################################## # # Changing tides gene expression # # By Tammy L. Wilson 30 Oct 2018 # New data received 9 Nov 2019 # Revised figure formatting and gene order 28 Feb 2019 # Annotated for PeerJ 1 Apr 2019 ## Addresses influential non-response (34) by removing 2 records. # ################################################ setwd("ENTER PATH HERE/20190401_CounihanETAL") library(lme4) library(nlme) library(emmeans) library(multcomp) library(TukeyC) library(lmerTest) library(pwr2) ######################################################################## ### Functions is.even <- function(x) x %% 2 == 0 is.odd <- function(x) x %% 2 != 0 panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...){ usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- round(cor(x, y,method="pearson"),digits=digits) p<- cor.test(x,y,method="pearson") t<- if(p$p.value<=0.05) 2 else 1 txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste0(prefix, txt) if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, font = t, col = t) } panel.cor.all <- function(x, y, digits = 2, prefix = "", cex.cor, ...){ usr <- par("usr"); on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- round(cor(x, y,method="pearson"),digits=digits) p<- cor.test(x,y,method="pearson") t<- if(p$p.value<0.05) 2 else 1 txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste0(prefix, txt) if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, font = t, col = t) } #### Function to analyze genes and save relevant output gene.analysis<-function(site.vec,gene,year,pSite,gene.name,family) { data<-data.frame(gene,year,pSite) ### Fit random effects model, REML did not return valid results G2<- lmer(gene ~ pSite + (1 | year), REML=FALSE, data = data) ## Post hoc tests. Have to use the Maximum Likelihood parameterization. ## Site means means.G2<-emmeans(G2, ~pSite,adjust="tukey" ) ## Posthoc tests for site differences posthoc.G2<- glht(G2, linfct = mcp(pSite = "Tukey")) mcs.G2 = summary(posthoc.G2,test=adjusted("single-step")) cld.G2<-cld(mcs.G2,level=0.05, decreasing=T) ## posthoc test for park difference K.G2<-matrix(c(0,1,1,0,-1,-1),1) t.G2<-glht(G2, linfct = K.G2) park.G2<-summary(t.G2) ## Return this ### Make data.frame with means, summary stats, tukey letters m.G2<-as.data.frame(summary(means.G2))[c('emmean','SE','lower.CL','upper.CL')] test.G2<-as.data.frame<-cld.G2[[10]]$Letters gene.name1<-rep(gene.name,length(site.vec)) m.test.G2<-data.frame(site.vec,gene.name1,m.G2,test.G2) ### Fit interaction of year and site to get means for correlation analysis G1<-glm(gene~ pSite*year, data = data ) means.G1<-emmeans(G1, ~pSite|year ) ## Return this. ### save output #### Site means write.table(m.test.G2,file = file.path('output',paste0('/',gene.name,'SiteMeans.csv')) ,append=FALSE, sep=",",row.names = FALSE) #### Site means by year #### This required processing outside of R to combine results. # I couldn't figure out how to format the output properly. # Table with combined results is included with the data. sink(file = file.path('output',paste0('/',gene.name,'SiteMeansbyYear.txt')),append=FALSE) print(means.G1) sink() } #end gene.analysis function #### Function to analyze log-normal physiological markers and save relevant output physio.analysis<-function(site.vec,physio,year,pSite,physio.name) { data<-data.frame(physio,year,pSite) ### Fit random effects model, REML did not return valid results G2<- lmer(log(physio) ~ pSite + (1 | year), REML=FALSE, data = data) ## Post hoc tests. Have to use the Maximum Likelihood parameterization. ## Site means means.G2<-emmeans(G2, ~pSite,adjust="tukey",type="response") ## Posthoc tests for site differences posthoc.G2<- glht(G2, linfct = mcp(pSite = "Tukey")) mcs.G2 = summary(posthoc.G2,test=adjusted("single-step")) cld.G2<-cld(mcs.G2,level=0.05, decreasing=T) ## posthoc test for park difference K.G2<-matrix(c(0,1,1,0,-1,-1),1) t.G2<-glht(G2, linfct = K.G2) park.G2<-summary(t.G2) ## Return this ### Make data.frame with means, summary stats, tukey letters m.G2<-as.data.frame(summary(means.G2))[c('response','SE','lower.CL','upper.CL')] test.G2<-as.data.frame<-cld.G2[[10]]$Letters physio.name1<-rep(physio.name,length(site.vec)) m.test.G2<-data.frame(site.vec,physio.name1,m.G2,test.G2) ### Fit interaction of year and site to get means for correlation analysis G1<-glm(log(physio)~ pSite*year, data = data) means.G1<-emmeans(G1, ~pSite|year,type="response") ## Return this. ### save output #### Site means write.table(m.test.G2,file = file.path('output',paste0('/',physio.name,'SiteMeans.csv')) ,append=FALSE, sep=",",row.names = FALSE) #### Site means by year #### This required processing outside of R to combine results. # I couldn't figure out how to format the output properly. # Table with combined results is included with the data. sink(file = file.path('output',paste0('/',physio.name,'SiteMeansbyYear.txt')),append=FALSE) print(means.G1) sink() } #end physio.analysis function ### End Functions ############################################################################################### geneData<-read.csv("CounihanETAL2019_geneData.csv", header=T) physioData<-read.csv("CounihanETAL2019_physioData.csv", header=T) gene.vec<-c("CaM","Casp8","MIF","CNN","CHI","CCOIV","HSP70","HSP90","HIFa" ,"MytB","Myt","MT20","Cyp3","P53") physio.vec<-c("Condition.Factor","Shell.Thickness","Hemocyte.Count","H2O2","RNA:DNA","P450","HSP40") site.vec<-c("Kaflia", "Kukak", "Takli", "Chinitna", "Fossil","Silver") ################# ### Gene analysis gene.analysis(site.vec=site.vec,gene=geneData$CNN,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="CNN") gene.analysis(site.vec=site.vec,gene=geneData$CaM,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="CaM") gene.analysis(site.vec=site.vec,gene=geneData$Casp8,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="Casp8") gene.analysis(site.vec=site.vec,gene=geneData$CCOIV,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="CCOIV") gene.analysis(site.vec=site.vec,gene=geneData$CHI,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="CHI") gene.analysis(site.vec=site.vec,gene=geneData$Cyp3,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="Cyp3") gene.analysis(site.vec=site.vec,gene=geneData$HIFa,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="HiFa") gene.analysis(site.vec=site.vec,gene=geneData$HSP70,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="HSP70") gene.analysis(site.vec=site.vec,gene=geneData$HSP90,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="HSP90") gene.analysis(site.vec=site.vec,gene=geneData$MIF,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="MIF") gene.analysis(site.vec=site.vec,gene=geneData$MT20,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="MT20") gene.analysis(site.vec=site.vec,gene=geneData$MytB,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="MytB") gene.analysis(site.vec=site.vec,gene=geneData$Myt,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="Myt") gene.analysis(site.vec=site.vec,gene=geneData$P53,year=as.factor(geneData$year),pSite=geneData$pSite,gene.name="P53") ### Physiological Biomarkers gene.analysis(site.vec=site.vec,gene=physioData$cond.factor,year=as.factor(physioData$year),pSite=physioData$pSite,gene.name="Condition.Factor") gene.analysis(site.vec=site.vec,gene=physioData$shell.thick,year=as.factor(physioData$year),pSite=physioData$pSite,gene.name="Shell.Thickness") physio.analysis(site.vec=site.vec,physio=physioData$hemo.count,year=as.factor(physioData$year),pSite=physioData$pSite,physio.name="Hemocyte.Count") gene.analysis(site.vec=site.vec,gene=physioData$H2O2,year=as.factor(physioData$year),pSite=physioData$pSite,gene.name="H2O2") physio.analysis(site.vec=site.vec,physio=physioData$RNA.DNA,year=as.factor(physioData$year),pSite=physioData$pSite,physio.name="RNADNA") physio.analysis(site.vec=site.vec,physio=physioData$P450,year=as.factor(physioData$year),pSite=physioData$pSite,physio.name="P450") physio.analysis(site.vec=site.vec,physio=physioData$HSP40,year=as.factor(physioData$year),pSite=physioData$pSite,physio.name="HSP40") ### ################## ### Call output for means and Tukey letters # Gene CNNMeans<-read.table("output/CNNSiteMeans.csv", header = T, sep = ",") CaMMeans<-read.table("output/CaMSiteMeans.csv", header = T, sep = ",") Casp8Means<-read.table("output/Casp8SiteMeans.csv", header = T, sep = ",") CCOIVMeans<-read.table("output/CCOIVSiteMeans.csv", header = T, sep = ",") CHIMeans<-read.table("output/CHISiteMeans.csv", header = T, sep = ",") Cyp3Means<-read.table("output/Cyp3SiteMeans.csv", header = T, sep = ",") HiFaMeans<-read.table("output/HiFaSiteMeans.csv", header = T, sep = ",") HSP70Means<-read.table("output/HSP70SiteMeans.csv", header = T, sep = ",") HSP90Means<-read.table("output/HSP90SiteMeans.csv", header = T, sep = ",") MIFMeans<-read.table("output/MIFSiteMeans.csv", header = T, sep = ",") MT20Means<-read.table("output/MT20SiteMeans.csv", header = T, sep = ",") MytBMeans<-read.table("output/MytBSiteMeans.csv", header = T, sep = ",") MytMeans<-read.table("output/MytSiteMeans.csv", header = T, sep = ",") p53Means<-read.table("output/p53SiteMeans.csv", header = T, sep = ",") # Physio cond.factorMeans<-read.table("output/Condition.FactorSiteMeans.csv", header = T, sep = ",") shell.thickMeans<-read.table("output/Shell.ThicknessSiteMeans.csv", header = T, sep = ",") hemo.countMeans<-read.table("output/Hemocyte.CountSiteMeans.csv", header = T, sep = ",") H2O2Means<-read.table("output/H2O2SiteMeans.csv", header = T, sep = ",") RNA.DNAMeans<-read.table("output/RNADNASiteMeans.csv", header = T, sep = ",") P450Means<-read.table("output/P450SiteMeans.csv", header = T, sep = ",") HSP40Means<-read.table("output/HSP40SiteMeans.csv", header = T, sep = ",") ### Combine output for figure # gene means.all.gene<-data.frame(CaMMeans[,3],Casp8Means[,3],MIFMeans[,3],CNNMeans[,3],CHIMeans[,3] ,CCOIVMeans[,3],HSP70Means[,3],HSP90Means[,3],HiFaMeans[,3],MytBMeans[,3] ,MytMeans[,3],MT20Means[,3],Cyp3Means[,3],p53Means[,3]) lcl.all.gene<-data.frame(CaMMeans[,5],Casp8Means[,5],MIFMeans[,5],CNNMeans[,5],CHIMeans[,5] ,CCOIVMeans[,5],HSP70Means[,5],HSP90Means[,5],HiFaMeans[,5],MytBMeans[,5] ,MytMeans[,5],MT20Means[,5],Cyp3Means[,5],p53Means[,5]) ucl.all.gene<-data.frame(CaMMeans[,6],Casp8Means[,6],MIFMeans[,6],CNNMeans[,6],CHIMeans[,6] ,CCOIVMeans[,6],HSP70Means[,6],HSP90Means[,6],HiFaMeans[,6],MytBMeans[,6] ,MytMeans[,6],MT20Means[,6],Cyp3Means[,6],p53Means[,6]) Tltr.all.gene<-data.frame(CaMMeans[,7],Casp8Means[,7],MIFMeans[,7],CNNMeans[,7],CHIMeans[,7] ,CCOIVMeans[,7],HSP70Means[,7],HSP90Means[,7],HiFaMeans[,7],MytBMeans[,7] ,MytMeans[,7],MT20Means[,7],Cyp3Means[,7],p53Means[,7]) # Physio means.all.physio<-data.frame(cbind(cond.factorMeans[,3],shell.thickMeans[,3],hemo.countMeans[,3],H2O2Means[,3] ,RNA.DNAMeans[,3],P450Means[,3] ,HSP40Means[,3])) lcl.all.physio<-data.frame(cbind(cond.factorMeans[,5],shell.thickMeans[,5],hemo.countMeans[,5],H2O2Means[,5] ,RNA.DNAMeans[,5],P450Means[,5] ,HSP40Means[,5])) ucl.all.physio<-data.frame(cbind(cond.factorMeans[,6],shell.thickMeans[,6],hemo.countMeans[,6],H2O2Means[,6] ,RNA.DNAMeans[,6],P450Means[,6] ,HSP40Means[,6])) Tltr.all.physio<-data.frame(cond.factorMeans[,7],shell.thickMeans[,7],hemo.countMeans[,7],H2O2Means[,7],RNA.DNAMeans[,7],P450Means[,7] ,HSP40Means[,7]) ### Plots #### Gene boxplot par(mfrow=c(14,1), mai = c(0,.5,0,0), omi = c(.25,1,1,1)) for(i in 1:14){ max.y<-max(geneData[,i],na.rm = TRUE)+4 min.y<-min(geneData[,i],na.rm=TRUE)-1 bp<-boxplot (geneData[,i] ~ geneData[,16] ,xaxt = "n", yaxt="n",ylim=c(min.y,max.y),medlwd=1) ifelse(is.even(i)==TRUE,axis(4),axis(2)) if(is.even(i)==TRUE){mtext(text=gene.vec2[i],side=4,cex=.75,line=2)} if(is.even(i)==FALSE){mtext(text=gene.vec2[i],side=2,cex=.75,line=2)} loc.x<-seq(1:6) loc.y<-rep(30,6) points(loc.x,means.all.gene[,i],col="red",pch=18, cex=1.5) for(j in 1:6){ arrows(loc.x[j],lcl.all.gene[j,i],loc.x[j],ucl.all.gene[j,i],code=3,col="red",angle=75,length=.1) text(x = loc.x[j],y = max.y-2, Tltr.all.gene[j,i]) } } mtext(text="Gene Expression", side=2, line = .5, outer=TRUE) mtext(text="Kaflia",line = .5, at = .215, side=3,outer=T,cex=.75) mtext(text="Kukak", line = .5, at = .35, side=3,outer=T,cex=.75) mtext(text="Takli", line = .5, at = .48, side = 3, outer = T, cex = .75) mtext(text="Chinitna", line = .5, at = .625, side = 3, outer = T, cex = .75) mtext(text="Fossil", line = .5, at = .765, side = 3, outer = T, cex = .75) mtext(text="Silver", line = .5, at = .90, side = 3, outer = T, cex = .75) mtext(text="Katmai", line = 1.5, at = .35, side = 3, outer = T, cex = 1) mtext (text= "Lake Clark", line = 1.5, at = .76, side = 3, outer = T, cex = 1) ### Physio Boxplot par(mfrow=c(7,1), mai = c(0,.5,0,0), omi = c(.25,1,1,1)) for(i in 1:7){ max.y<-max(physioData[,i],na.rm = TRUE)*1.3 min.y<-min(physioData[,i],na.rm=TRUE)*(-.85) bp<-boxplot (physioData[,i] ~ physioData[,9] #* physioData[,17], col=c("white","dark gray"),ylab = physio.vec[i] ,xaxt = "n", yaxt="n",ylim=c(min.y,max.y),medlwd=1) ifelse(is.even(i)==TRUE,axis(4),axis(2)) if(is.even(i)==TRUE){mtext(text=physio.vec[i],side=4,cex=.75,line=2)} if(is.even(i)==FALSE){mtext(text=physio.vec[i],side=2,cex=.75,line=2)} loc.x<-seq(1:6) loc.y<-rep(15,6) points(loc.x,means.all.physio[,i],col="red",pch=18, cex=1.5) for(j in 1:6){ arrows(loc.x[j],lcl.all.physio[j,i],loc.x[j],ucl.all.physio[j,i],code=3,col="red",angle=75,length=.1) text(x = loc.x[j],y = max(physioData[,i],na.rm="TRUE")*1.2, Tltr.all.physio[j,i]) } } mtext(text="Physiological Marker Value", side=2, line = .5, outer=TRUE) mtext(text="Kaflia",line = .5, at = .215, side=3,outer=T,cex=.75) mtext(text="Kukak", line = .5, at = .35, side=3,outer=T,cex=.75) mtext(text="Takli", line = .5, at = .48, side = 3, outer = T, cex = .75) mtext(text="Chinitna", line = .5, at = .625, side = 3, outer = T, cex = .75) mtext(text="Fossil", line = .5, at = .765, side = 3, outer = T, cex = .75) mtext(text="Silver", line = .5, at = .90, side = 3, outer = T, cex = .75) mtext(text="Katmai", line = 1.5, at = .35, side = 3, outer = T, cex = 1) mtext (text= "Lake Clark", line = 1.5, at = .76, side = 3, outer = T, cex = 1) ### Correlation analysis between genes GXY<-read.table("output/GenesSiteXYear.csv", header=T, sep = ",") ## All correlations from means pairs(na.omit(GXY[,3:23]),lower.panel=panel.smooth,pch=1, upper.panel=panel.cor) ### Using data points rather than means for gene and physio correlations ## Adjust the p value to 0.001 and use Pearson correlations pairs(na.omit(geneData[,1:14]),lower.panel=panel.smooth,pch=1, upper.panel=panel.cor.all) pairs(na.omit(physioData[,1:7]),lower.panel=panel.smooth,pch=1, upper.panel=panel.cor.all)