############ R script: BostonMortgageViaStan ########## # For fitting a generalized additive model to the # the Boston mortgage data via Stan. # Last changed: 08 MAY 2020 # Set tasks: doMCMC <- TRUE ; compileCode <- TRUE doSummMCMC <- TRUE doFitPlots <- TRUE # Load required packages: library(rstan) ; library(HRW) # Set MCMC parameters: nWarm <- 200 # Length of warmup. nKept <- 200 # Size of the kept sample. nThin <- 1 # Thinning factor. # Load in data: BostonMortgages <- read.table("BostonMortgages.txt",header=TRUE) # Set response and predictor variables: y <- as.numeric(as.character(BostonMortgages$deny)=="yes") x1 <- BostonMortgages$dir x2 <- BostonMortgages$lvr x3 <- as.numeric(as.character(BostonMortgages$black)=="yes") x4 <- as.numeric(as.character(BostonMortgages$pbcr)=="yes") x5 <- as.numeric(as.character(BostonMortgages$self)=="yes") x6 <- as.numeric(as.character(BostonMortgages$single)=="yes") x7 <- as.numeric(BostonMortgages$ccs==2) x8 <- as.numeric(BostonMortgages$ccs==3) x9 <- as.numeric(BostonMortgages$ccs==4) x10 <- as.numeric(BostonMortgages$ccs==5) x11 <- as.numeric(BostonMortgages$ccs==6) # Set up Z1 and Z2 matrices containing spline bases for x1 and x2: numIntKnots1 <- 15 a1 <- 1.01*min(x1) - 0.01*max(x1) b1 <- 1.01*max(x1) - 0.01*min(x1) intKnots1 <- quantile(unique(x1),seq(0,1,length = numIntKnots1+2) [-c(1,numIntKnots1+2)]) Z1 <- ZOSull(x1,intKnots = intKnots1,range.x = c(a1,b1)) ncZ1 <- ncol(Z1) numIntKnots2 <- 15 a2 <- 1.01*min(x2) - 0.01*max(x2) b2 <- 1.01*max(x2) - 0.01*min(x2) intKnots2 <- quantile(unique(x2),seq(0,1,length = numIntKnots2+2) [-c(1,numIntKnots2+2)]) Z2 <- ZOSull(x2,intKnots = intKnots2,range.x = c(a2,b2)) ncZ2 <- ncol(Z2) if (doMCMC) { # Specify logistic additive model in Stan: logistAddModel <- 'data { int n; int ncX; int ncZ1; int ncZ2; int y[n]; matrix[n,ncX] X; matrix[n,ncZ1] Z1; matrix[n,ncZ2] Z2; real sigmabeta; real ssigma; } parameters { vector[ncX] beta; vector[ncZ1] u1; real sigmau1; vector[ncZ2] u2; real sigmau2; } transformed parameters { vector[n] etaVec; etaVec = X*beta + Z1*u1 + Z2*u2; } model { for (i in 1:n) y[i] ~ bernoulli_logit(etaVec[i]); u1 ~ normal(0,sigmau1); u2 ~ normal(0,sigmau2); beta ~ normal(0,sigmabeta); sigmau1 ~ cauchy(0,ssigma); sigmau2 ~ cauchy(0,ssigma); }' # Store data in a list in format required by Stan: X <- cbind(1,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11) allData <- list(n=length(y),ncX=ncol(X),ncZ1=ncol(Z1),ncZ2=ncol(Z2), y=y,X=X,Z1=Z1,Z2=Z2,sigmabeta=1e5,ssigma=1e5) # Compile code for model if required: if (compileCode) stanCompilObj <- stan(model_code=logistAddModel,data=allData, iter=1,chains=1) # Obtain MCMC samples for each parameter using Stan: stanObj <- stan(model_code=logistAddModel,data=allData, warmup=nWarm,iter=(nWarm+nKept), chains=1,thin=nThin,refresh=25,fit=stanCompilObj) beta0MCMC <- as.vector(extract(stanObj,"beta[1]",permuted=FALSE)) beta1MCMC <- as.vector(extract(stanObj,"beta[2]",permuted=FALSE)) beta2MCMC <- as.vector(extract(stanObj,"beta[3]",permuted=FALSE)) beta3MCMC <- as.vector(extract(stanObj,"beta[4]",permuted=FALSE)) beta4MCMC <- as.vector(extract(stanObj,"beta[5]",permuted=FALSE)) beta5MCMC <- as.vector(extract(stanObj,"beta[6]",permuted=FALSE)) beta6MCMC <- as.vector(extract(stanObj,"beta[7]",permuted=FALSE)) beta7MCMC <- as.vector(extract(stanObj,"beta[8]",permuted=FALSE)) beta8MCMC <- as.vector(extract(stanObj,"beta[9]",permuted=FALSE)) beta9MCMC <- as.vector(extract(stanObj,"beta[10]",permuted=FALSE)) beta10MCMC <- as.vector(extract(stanObj,"beta[11]",permuted=FALSE)) beta11MCMC <- as.vector(extract(stanObj,"beta[12]",permuted=FALSE)) betaMCMC <- rbind(beta0MCMC,beta1MCMC,beta2MCMC,beta3MCMC,beta4MCMC, beta5MCMC,beta6MCMC,beta7MCMC,beta8MCMC,beta9MCMC, beta10MCMC,beta11MCMC) u1MCMC <- NULL for (k in 1:ncZ1) { charVar <- paste("u1[",as.character(k),"]",sep = "") u1MCMC <- rbind(u1MCMC,extract(stanObj,charVar,permuted = FALSE)) } u2MCMC <- NULL for (k in 1:ncZ2) { charVar <- paste("u2[",as.character(k),"]",sep = "") u2MCMC <- rbind(u2MCMC,extract(stanObj,charVar,permuted = FALSE)) } sigmau1MCMC <- as.vector(extract(stanObj,"sigmau1",permuted=FALSE)) sigmau2MCMC <- as.vector(extract(stanObj,"sigmau2",permuted=FALSE)) } if (doSummMCMC) { parNamesVal <- list(c("applicant","is black"),c("public bad","credit record"), c("self-","employed"),c("single"),c("credit risk","value of 2"), c("credit risk","value of 3"),c("credit risk","value of 4"), c("credit risk","value of 5"),c("credit risk","value of 6"), c(expression(sigma[u1])),c(expression(sigma[u2]))) MCMClist <- list(cbind(beta3MCMC,beta4MCMC,beta5MCMC,beta6MCMC,beta7MCMC, beta8MCMC,beta9MCMC,beta10MCMC,beta11MCMC,sigmau1MCMC, sigmau2MCMC)) summMCMC(MCMClist,parNames=parNamesVal) readline("Hit Enter to continue.\n") } if (doFitPlots) { par(mfrow=c(1,2)) ; ylimVal <- c(0,0.5) # Set function for obtaining the mode of a vector: modalValue <- function(x) return(unique(x)[which.max(tabulate(match(x,unique(x))))]) # Set grids for `dir' and `lvg': ng <- 101 ; dirg <- seq(0,1,length = ng) ; lvrg <- seq(0.01,1,length = ng) # Obtain and plot slice of the probability surface # in the `dir' direction corresponding to the modal # values of categorical predictors and the mean of # other continuous predictors: Xg <- cbind(1, dirg, mean(BostonMortgages$lvr), as.numeric(as.character(modalValue(BostonMortgages$black))=="yes"), as.numeric(as.character(modalValue(BostonMortgages$pbcr))=="yes"), as.numeric(as.character(modalValue(BostonMortgages$self))=="yes"), as.numeric(as.character(modalValue(BostonMortgages$single))=="yes"), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="2")), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="3")), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="4")), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="5")), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="6"))) Z1g <- ZOSull(dirg,intKnots = intKnots1,range.x = c(a1,b1)) Z2g <- ZOSull(rep(mean(BostonMortgages$lvr),ng),intKnots = intKnots2,range.x = c(a2,b2)) etaHatMCMC <- Xg%*%betaMCMC + Z1g%*%u1MCMC + Z2g%*%u2MCMC probHatMCMC <- plogis(etaHatMCMC) probdirg <- apply(probHatMCMC,1,mean) credLowg <- apply(probHatMCMC,1,quantile,0.025) credUppg <- apply(probHatMCMC,1,quantile,0.975) plot(0,type="l",xlim=range(dirg),ylim=ylimVal,bty="l",xlab="debt payments to income ratio", ylab="probability of denial") polygon(c(dirg,rev(dirg)),c(credLowg,rev(credUppg)),col="palegreen",border=FALSE) lines(dirg,probdirg,col="darkgreen",lwd=2) abline(h=0,col="slateblue") rug(BostonMortgages$dir,col="dodgerblue") # Obtain and plot slice of the probability surface # in the `lvr' direction corresponding to the modal # values of categorical predictors and the mean of # other continuous predictors: Xg <- cbind(1, mean(BostonMortgages$dir), lvrg, as.numeric(as.character(modalValue(BostonMortgages$black))=="yes"), as.numeric(as.character(modalValue(BostonMortgages$pbcr))=="yes"), as.numeric(as.character(modalValue(BostonMortgages$self))=="yes"), as.numeric(as.character(modalValue(BostonMortgages$single))=="yes"), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="2")), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="3")), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="4")), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="5")), as.numeric(modalValue(as.character(BostonMortgages$ccs)=="6"))) Z1g <- ZOSull(rep(mean(BostonMortgages$dir),ng),intKnots = intKnots1,range.x = c(a1,b1)) Z2g <- ZOSull(lvrg,intKnots = intKnots2,range.x = c(a2,b2)) etaHatMCMC <- Xg%*%betaMCMC + Z1g%*%u1MCMC + Z2g%*%u2MCMC probHatMCMC <- plogis(etaHatMCMC) problvrg <- apply(probHatMCMC,1,mean) credLowg <- apply(probHatMCMC,1,quantile,0.025) credUppg <- apply(probHatMCMC,1,quantile,0.975) plot(0,type="l",xlim=range(lvrg),ylim=ylimVal,bty="l",xlab="loan size to property value ratio", ylab="probability of denial") polygon(c(lvrg,rev(lvrg)),c(credLowg,rev(credUppg)),col="palegreen",border=FALSE) lines(lvrg,problvrg,col="darkgreen",lwd=2) abline(h=0,col="slateblue") rug(BostonMortgages$lvr,col="dodgerblue") } ############ End of BostonMortgageViaStan ############