########## R script: ozoneCalifViaStan.Rs ########## # For performing MCMC-based fitting of a # generalised additive model for the # Californian ozone data using Stan. # Last changed: 15 MAY 2020 # Set tasks: doMCMC <- TRUE ; compileCode <- TRUE doSummMCMC <- TRUE doFitPlots <- TRUE # Load required packages: library(rstan) ; library(HRW) # Set MCMC parameters: nWarm <- 1000 # Length of warmup. nKept <- 1000 # Size of the kept sample. nThin <- 1 # Thinning factor. # Read in and extract data: ozoneCalif <- read.table("ozoneCalif.txt",header=T) yOrig <- ozoneCalif$ozone x1Orig <- ozoneCalif$inversion.base.height x2Orig <- ozoneCalif$daggett.pressure.gradient x3Orig <- ozoneCalif$inversion.base.temp # Standardised data for MCMC: mean.y <- mean(yOrig) ; sd.y <- sd(yOrig) mean.x1 <- mean(x1Orig) ; sd.x1 <- sd(x1Orig) mean.x2 <- mean(x2Orig) ; sd.x2 <- sd(x2Orig) mean.x3 <- mean(x3Orig) ; sd.x3 <- sd(x3Orig) y <- (yOrig - mean.y)/sd.y x1 <- (x1Orig - mean.x1)/sd.x1 x2 <- (x2Orig - mean.x2)/sd.x2 x3 <- (x3Orig - mean.x3)/sd.x3 # Set up matrices for additive model: numIntKnots <- 15 intKnots1 <- quantile(unique(x1),seq(0,1,length= (numIntKnots+2))[-c(1,(numIntKnots+2))]) range.x1 <- c(min(x1),max(x1)) Z1 <- ZOSull(x1,range.x1,intKnots1) ncZ1 <- ncol(Z1) intKnots2 <- quantile(unique(x2),seq(0,1,length= (numIntKnots+2))[-c(1,(numIntKnots+2))]) range.x2 <- c(min(x2),max(x2)) Z2 <- ZOSull(x2,range.x2,intKnots2) ncZ2 <- ncol(Z2) intKnots3 <- quantile(unique(x3),seq(0,1,length= (numIntKnots+2))[-c(1,(numIntKnots+2))]) range.x3 <- c(min(x3),max(x3)) Z3 <- ZOSull(x3,range.x3,intKnots3) ncZ3 <- ncol(Z3) if (doMCMC) { # Specify model in Stan: GaussAddModel <- 'data { int n; int ncX; int ncZ1; int ncZ2; int ncZ3; vector[n] y; matrix[n,ncX] X; matrix[n,ncZ1] Z1; matrix[n,ncZ2] Z2; matrix[n,ncZ3] Z3; real sigmabeta; real ssigma; } parameters { vector[ncX] beta; real sigmaeps; vector[ncZ1] u1; real sigmau1; vector[ncZ2] u2; real sigmau2; vector[ncZ3] u3; real sigmau3; } model { y ~ normal(X*beta + Z1*u1 + Z2*u2 + Z3*u3,sigmaeps); u1 ~ normal(0,sigmau1); u2 ~ normal(0,sigmau2); u3 ~ normal(0,sigmau3); beta ~ normal(0,sigmabeta); sigmau1 ~ cauchy(0,ssigma); sigmau2 ~ cauchy(0,ssigma); sigmau3 ~ cauchy(0,ssigma); sigmaeps ~ cauchy(0,ssigma); }' # Store data in a list in format required by Stan: X <- cbind(1,x1,x2,x3) allData <- list(n=length(y),ncX=ncol(X),ncZ1=ncol(Z1),ncZ2=ncol(Z2), ncZ3=ncol(Z3),y=y,X=X,Z1=Z1,Z2=Z2,Z3=Z3, sigmabeta=1e5,ssigma=1e5) # Compile code for model if required: if (compileCode) stanCompilObj <- stan(model_code=GaussAddModel,data=allData, iter=1,chains=1) # Obtain MCMC samples for each parameter using Stan: stanObj <- stan(model_code=GaussAddModel,data=allData, warmup=nWarm,iter=(nWarm+nKept), chains=1,thin=nThin,refresh=250,fit=stanCompilObj) betaMCMC <- as.vector(extract(stanObj,"beta[1]",permuted=FALSE)) betaMCMC <- rbind(betaMCMC,as.vector(extract(stanObj,"beta[2]",permuted=FALSE))) betaMCMC <- rbind(betaMCMC,as.vector(extract(stanObj,"beta[3]",permuted=FALSE))) betaMCMC <- rbind(betaMCMC,as.vector(extract(stanObj,"beta[4]",permuted=FALSE))) sigmaepsMCMC <- as.vector(extract(stanObj,"sigmaeps",permuted=FALSE)) sigmau1MCMC <- as.vector(extract(stanObj,"sigmau1",permuted=FALSE)) sigmau2MCMC <- as.vector(extract(stanObj,"sigmau2",permuted=FALSE)) sigmau3MCMC <- as.vector(extract(stanObj,"sigmau3",permuted=FALSE)) u1MCMC <- NULL for (k in 1:ncZ1) { charVar <- paste("u1[",as.character(k),"]",sep = "") u1MCMC <- rbind(u1MCMC,extract(stanObj,charVar,permuted = FALSE)) } u2MCMC <- NULL for (k in 1:ncZ2) { charVar <- paste("u2[",as.character(k),"]",sep = "") u2MCMC <- rbind(u2MCMC,extract(stanObj,charVar,permuted = FALSE)) } u3MCMC <- NULL for (k in 1:ncZ3) { charVar <- paste("u3[",as.character(k),"]",sep = "") u3MCMC <- rbind(u3MCMC,extract(stanObj,charVar,permuted = FALSE)) } uMCMC <- rbind(u1MCMC,u2MCMC,u3MCMC) } if (doSummMCMC) { Z <- cbind(Z1,Z2,Z3) CTC <- crossprod(cbind(X,cbind(Z1,Z2,Z3))) Dmat <- diag(c(rep(0,4),rep(1,ncol(Z)))) lambda1MCMC <- (sigmaepsMCMC/sigmau1MCMC)^2 lambda2MCMC <- (sigmaepsMCMC/sigmau2MCMC)^2 lambda3MCMC <- (sigmaepsMCMC/sigmau3MCMC)^2 inds1 <- c(2,5:(4+ncZ1)) inds2 <- c(3,(5+ncZ1):(4+ncZ1+ncZ2)) inds3 <- c(4,(5+ncZ1+ncZ2):(4+ncZ1+ncZ2+ncZ3)) EDF1MCMC <- rep(NA,length(lambda1MCMC)) EDF2MCMC <- rep(NA,length(lambda2MCMC)) EDF3MCMC <- rep(NA,length(lambda3MCMC)) for (iMCMC in 1:length(lambda1MCMC)) { dVecCurr <- c(rep(0,4),rep(lambda1MCMC[iMCMC],ncZ1), rep(lambda2MCMC[iMCMC],ncZ2), rep(lambda3MCMC[iMCMC],ncZ3)) HatMatrix <- solve(CTC+diag(dVecCurr),CTC,tol=1e-20) EDF1MCMC[iMCMC] <- sum(diag(HatMatrix[inds1,inds1])) EDF2MCMC[iMCMC] <- sum(diag(HatMatrix[inds2,inds2])) EDF3MCMC[iMCMC] <- sum(diag(HatMatrix[inds3,inds3])) } parNamesVal <- list(c(expression(EDF[1])),c(expression(EDF[2])), c(expression(EDF[3]))) MCMClist <- list(cbind(EDF1MCMC,EDF2MCMC,EDF3MCMC)) summMCMC(MCMClist,parNames=parNamesVal) readline("Hit Enter to continue.\n") } if (doFitPlots) { # Draw plots par(mfrow=c(1,3)) ng <- 101 ylimVal <- c(0,30) cex.labVal <- 1.5 # Plot first function fit (with other variables # set at their averages): x1g <- seq(min(x1),max(x1),length=ng) x2g <- rep(mean(x2),length=ng) x3g <- rep(mean(x3),length=ng) Z1g <- ZOSull(x1g,range.x1,intKnots1) Z2g <- ZOSull(x2g,range.x2,intKnots2) Z3g <- ZOSull(x3g,range.x3,intKnots3) Xg <- cbind(rep(1,ng),x1g,x2g,x3g) Zg <- cbind(Z1g,Z2g,Z3g) f1MCMC <- Xg%*%betaMCMC + Zg%*%uMCMC f1MCMCOrig <- sd.y*f1MCMC + mean.y credLower <- apply(f1MCMCOrig,1,quantile,0.025) credUpper <- apply(f1MCMCOrig,1,quantile,0.975) f1gOrig <- apply(f1MCMCOrig,1,mean) x1gOrig <- sd.x1*x1g + mean.x1 plot(x1gOrig,f1gOrig,type="n",bty="l",xlab="inversion base height", ylab="ozone",ylim=ylimVal,cex.lab=cex.labVal) polygon(c(x1gOrig,rev(x1gOrig)),c(credLower,rev(credUpper)),col="palegreen",border=FALSE) lines(x1gOrig,f1gOrig,col="darkgreen",lwd=2) rug(x1Orig,col="dodgerblue") # Plot second function fit (with other variables # set at their averages): x1g <- rep(mean(x1),length=ng) x2g <- seq(min(x2),max(x2),length=ng) x3g <- rep(mean(x3),length=ng) Z1g <- ZOSull(x1g,range.x1,intKnots1) Z2g <- ZOSull(x2g,range.x2,intKnots2) Z3g <- ZOSull(x3g,range.x3,intKnots3) Xg <- cbind(rep(1,ng),x1g,x2g,x3g) Zg <- cbind(Z1g,Z2g,Z3g) f2MCMC <- Xg%*%betaMCMC + Zg%*%uMCMC f2MCMCOrig <- sd.y*f2MCMC + mean.y credLower <- apply(f2MCMCOrig,1,quantile,0.025) credUpper <- apply(f2MCMCOrig,1,quantile,0.975) f2gOrig <- apply(f2MCMCOrig,1,mean) x2gOrig <- sd.x2*x2g + mean.x2 plot(x2gOrig,f2gOrig,type="n",bty="l",xlab="Daggett pressure gradient", ylab="ozone",ylim=ylimVal,cex.lab=cex.labVal) polygon(c(x2gOrig,rev(x2gOrig)),c(credLower,rev(credUpper)),col="palegreen",border=FALSE) lines(x2gOrig,f2gOrig,col="darkgreen",lwd=2) rug(x2Orig,col="dodgerblue") # Plot third function fit (with other variables # set at their averages): x1g <- rep(mean(x1),length=ng) x2g <- rep(mean(x2),length=ng) x3g <- seq(min(x3),max(x3),length=ng) Z1g <- ZOSull(x1g,range.x1,intKnots1) Z2g <- ZOSull(x2g,range.x2,intKnots2) Z3g <- ZOSull(x3g,range.x3,intKnots3) Xg <- cbind(rep(1,ng),x1g,x2g,x3g) Zg <- cbind(Z1g,Z2g,Z3g) f3MCMC <- Xg%*%betaMCMC + Zg%*%uMCMC f3MCMCOrig <- sd.y*f3MCMC + mean.y credLower <- apply(f3MCMCOrig,1,quantile,0.025) credUpper <- apply(f3MCMCOrig,1,quantile,0.975) f3gOrig <- apply(f3MCMCOrig,1,mean) x3gOrig <- sd.x3*x3g + mean.x3 plot(x3gOrig,f3gOrig,type="n",bty="l",xlab="inversion base temperature", ylab="ozone",ylim=ylimVal,cex.lab=cex.labVal) polygon(c(x3gOrig,rev(x3gOrig)),c(credLower,rev(credUpper)), col="palegreen",border=FALSE) lines(x3gOrig,f3gOrig,col="darkgreen",lwd=2) rug(x3Orig,col="dodgerblue") } ########## End of ozoneCalifViaStan ##########