########## R script: lmean ########## # For illustrating mean and variance nonparametric # regression. A Bayesian approach is taken, with Stan # used for Markov chain Monte Carlo. # Last changed: 31 OCT 2023 # Set flags: compileCode <- TRUE # Load in required package: library(rstan) # Set the MCMC parameters: nBurnin <- 1000 # Length of burn-in. nIter <- 1000 # Size of the kept sample. nThin <- 1 # Thinning factor. # Read in LIDAR data from file: lidar <- read.table("lidar.txt",header=TRUE) xOrig <- lidar$range yOrig <- lidar$logratio # Standardise data for the Bayesian model fitting: aOrig <- 380 ; bOrig <- 730 mean.x <- mean(xOrig) ; sd.x <- sd(xOrig) mean.y <- mean(yOrig) ; sd.y <- sd(yOrig) x <- (xOrig - mean.x)/sd.x y <- (yOrig - mean.y)/sd.y # Obtain knots for x variable: a <- (aOrig - mean.x)/sd.x ; b <- (bOrig - mean.x)/sd.x numIntKnotsu <- 25 intKnotsu <- quantile(x,seq(0,1,length=numIntKnotsu+2)[-c(1,numIntKnotsu+2)]) Zu <- ZOSull(x,intKnots=intKnotsu,range.x=c(a,b)) ncZu <- ncol(Zu) numIntKnotsv <- 15 intKnotsv <- quantile(x,seq(0,1,length=numIntKnotsv+2)[-c(1,numIntKnotsv+2)]) Zv <- ZOSull(x,intKnots=intKnotsv,range.x=c(a,b)) ncZv <- ncol(Zv) # Specify model in Stan: meanAndVarNonparRegnModel <- 'data { int n; int ncZu; int ncZv; vector[n] y; matrix[n,2] X; matrix[n,ncZu] Zu; matrix[n,ncZv] Zv; } parameters { vector[2] beta; vector[2] gamma; vector[ncZu] u; vector[ncZv] v; real sigmau; real sigmav; } transformed parameters { vector[n] meanVec; vector[n] logVarVec; meanVec = X*beta + Zu*u ; logVarVec = X*gamma + Zv*v; } model { for (i in 1:n) { y[i] ~ normal(meanVec[i],exp(logVarVec[i]/2)); } u ~ normal(0,sigmau); v ~ normal(0,sigmav); beta ~ normal(0,100000) ; gamma ~ normal(0,10000); sigmau ~ cauchy(0,10000) ; sigmav ~ cauchy(0,10000); }' # Store data in a list in format required by STAN: X <- cbind(1,x) allData <- list(n=length(y),X=X,y=y,ncZu=ncZu,Zu=Zu,ncZv=ncZv,Zv=Zv) # Compile code for model if required: if (compileCode) stanCompilObj <- stan(model_code=meanAndVarNonparRegnModel,data=allData, iter=10,chains=1) # Perform MCMC: stanObj <- stan(model_code=meanAndVarNonparRegnModel,data=allData, warmup=nBurnin,iter=(nBurnin+nIter), chains=1,thin=nThin, fit=stanCompilObj) betaMCMC <- NULL for (j in 1:2) { charVar <- paste("beta[",as.character(j),"]",sep="") betaMCMC <- rbind(betaMCMC,extract(stanObj,charVar,permuted=FALSE)) } gammaMCMC <- NULL for (j in 1:2) { charVar <- paste("gamma[",as.character(j),"]",sep="") gammaMCMC <- rbind(gammaMCMC,extract(stanObj,charVar,permuted=FALSE)) } uMCMC <- NULL for (k in 1:ncZu) { charVar <- paste("u[",as.character(k),"]",sep="") uMCMC <- rbind(uMCMC,extract(stanObj,charVar,permuted=FALSE)) } vMCMC <- NULL for (k in 1:ncZv) { charVar <- paste("v[",as.character(k),"]",sep="") vMCMC <- rbind(vMCMC,extract(stanObj,charVar,permuted=FALSE)) } sigmauMCMC <- as.vector(extract(stanObj,"sigmau",permuted=FALSE)) sigmavMCMC <- as.vector(extract(stanObj,"sigmav",permuted=FALSE)) # Obtain function-wise MCMC matrix, including those # on the original scale of the data: ng <- 101 xgOrig <- seq(aOrig,bOrig,length=ng) xg <- (xgOrig - mean.x)/sd.x Xg <- cbind(rep(1,ng),xg) Zug <- ZOSull(xg,intKnots=intKnotsu,range.x=c(a,b)) Zvg <- ZOSull(xg,intKnots=intKnotsv,range.x=c(a,b)) fhatMCMC <- Xg%*%betaMCMC + Zug%*%uMCMC fhatMCMCOrig <- fhatMCMC*sd.y + mean.y fhatgOrig <- apply(fhatMCMCOrig,1,mean) credLower <- apply(fhatMCMCOrig,1,quantile,0.025) credUpper <- apply(fhatMCMCOrig,1,quantile,0.975) par(mfrow=c(1,1)) plot(xOrig,yOrig,xlab="range",xlim=c(aOrig,bOrig), ylab="log(ratio)",bty="l") polygon(c(xgOrig,rev(xgOrig)),c(credLower,rev(credUpper)), col="palegreen",border=FALSE) lines(xgOrig,fhatgOrig,col="darkgreen",lwd=2) points(xOrig,yOrig,col="dodgerblue",lwd=1) readline("Hit Enter to continue.\n") # Do some summaries and diagnostic checks of the MCMC # at the quantiles of the range variable: indQ1 <- length(xg[xg<=quantile(x,0.25)]) indQ2 <- length(xg[xg<=quantile(x,0.50)]) indQ3 <- length(xg[xg<=quantile(x,0.75)]) fhatQ1 <- fhatMCMCOrig[indQ1,] fhatQ2 <- fhatMCMCOrig[indQ2,] fhatQ3 <- fhatMCMCOrig[indQ3,] fhatQuartiles <- list(cbind(fhatQ1,fhatQ2,fhatQ3)) parNamesVal <- list(c("1st quartile","of range"), c("2nd quartile","of range"), c("3rd quartile","of range")) summMCMC(fhatQuartiles,parNames=parNamesVal) readline("Hit Enter to continue.\n") # Plot the standard deviation function estimate: par(mfrow=c(1,1)) logghatMCMC <- Xg%*%gammaMCMC + Zvg%*%vMCMC logghatMCMCOrig <- logghatMCMC + 2*log(sd.y) sdMCMCOrig <- exp(logghatMCMCOrig/2) sdhatgOrig <- apply(sdMCMCOrig,1,mean) credLower <- apply(sdMCMCOrig,1,quantile,0.025) credUpper <- apply(sdMCMCOrig,1,quantile,0.975) plot(0,0,type="n",xlim=range(xgOrig), ylim=range(c(credLower,credUpper)), bty="l",main="standard deviation function estimate", xlab="range",ylab="log(ratio)") polygon(c(xgOrig,rev(xgOrig)),c(credLower,rev(credUpper)), col="palegreen",border=FALSE) lines(xgOrig,sdhatgOrig,col="darkgreen",lwd=2) readline("Hit Enter to continue.\n") # Do some summaries and diagnostic checks of the MCMC # at the quantiles of the range variable: logghatQ1 <- logghatMCMCOrig[indQ1,] logghatQ2 <- logghatMCMCOrig[indQ2,] logghatQ3 <- logghatMCMCOrig[indQ3,] logghatQuartiles <- list(cbind(logghatQ1,logghatQ2,logghatQ3)) parNamesVal <- list(c("1st quartile","of range"), c("2nd quartile","of range"), c("3rd quartile","of range")) summMCMC(logghatQuartiles,parNames=parNamesVal) ########## End meanAndVarNonparRegn ##########