## code to accompany the NELS math example from ## lecture 13-14 of PSTAT 215A, Bayesian Inference, UCSB ## ## Robert B. Gramacy, adapted from Peter Hoff ## load necessary libraries library(mvtnorm) library(coda) ## read in the data file math <- read.table("nels_math.txt", header=TRUE) ## allocate space to hold the data and summary ## statistics in a more useful structure Y <- list() YM <- NULL schools <- unique(math$school) m <- length(unique(schools)) n <- ybar <- ymed <- s2 <- rep(NA, m) ## populate the structure with the data from the file for(j in 1:m) { Y[[j]] <- math$score[ math$school == schools[j]] ybar[j] <- mean(Y[[j]]) ymed[j] <- median(Y[[j]]) n[j] <- length(Y[[j]]) s2[j] <- var(Y[[j]]) } N <- sum(n) ## set up a plot for the data plot(c(1,m), range(Y) ,type="n", ylab="math score", bty="n", xlab="rank of school-specific math score average") ## plot the data in ordered by averages o <- order(ybar) for(l in 1:length(o)) {## but it is easier to do this with commands in R, e.g., j <- o[l] points(rep(l,n[j]), Y[[j]], pch=16, cex=.6) segments(l, min(Y[[j]]), l, max(Y[[j]])) } abline(h=50, col=2, lty=2, lwd=2) ## plot a histogram of the range of the sample mean par(mfrow=c(1,2)) hist(ybar, bty="n", main="") plot(n, ybar, main="", bty="n") range(ybar) ## ## time for the Bayesian analysis ## ## weakly informative priors nu.0 <- 1; sigma2.0 <- 100 eta.0 <- 1; tau2.0 <- 100 psi.0 <- 20; g2.0 <- 25 ## allocate space for the samples S <- 5000 mu <- matrix(NA, nrow=S, ncol=m) sigma2 <- psi <- tau2 <- rep(NA, S) ## initial values of parameters mu[1,] <- ybar sigma2[1] <- mean(s2) psi[1] <- mean(mu[1,]) tau2[1] <- var(mu[1,]) ## Gibbs sampling loop for(s in 2:S) { ## sample new values of the mus for(j in 1:m) { V.mu <- 1/(n[j]/sigma2[s-1] + 1/tau2[s-1]) m.mu <- V.mu * (ybar[j]*n[j]/sigma2[s-1] + psi[s-1]/tau2[s-1]) mu[s,j] <- rnorm(1, m.mu, sqrt(V.mu)) } ## sample a new value of sigma2 nu.n <- nu.0 + N sigma2.s <- nu.0*sigma2.0 for(j in 1:m) sigma2.s <- sigma2.s + sum((Y[[j]] - mu[s,j])^2) sigma2[s] <- 1/rgamma(1, nu.n/2, sigma2.s/2) ## sample a new value of psi V.psi <- 1/(m/tau2[s-1] + 1/g2.0) m.psi <- V.psi * (m*mean(mu[s,])/tau2[s-1] + psi.0/g2.0) psi[s] <- rnorm(1, m.psi, sqrt(V.psi)) ## sample a new value of tau2 eta.m <- eta.0 + m tau2.s <- eta.0*tau2.0 + sum((mu[s,] - psi[s])^2) tau2[s] <- 1/rgamma(1, eta.m/2, tau2.s/2) } ## ## checking for mixing and convergence is ad hoc ## ## calculate ESSs for parameters mu.ess <- apply(mu, 2, effectiveSize) mu.ess ## check the mixing for one of the parameters via traces par(mfrow=c(1,1)) plot(mu[,15], type="l") ## calculate ESSs for hierarchical params effectiveSize(sigma2) effectiveSize(psi) effectiveSize(tau2) ## check the mixing via traces par(mfrow=c(3,1)) plot(sigma2, type="l") plot(psi, type="l") plot(tau2, type="l") ## hierarchical moments mean(psi) mean(sqrt(sigma2)) mean(sqrt(tau2)) ## posterior summaries of hierarchical parameters par(mfrow=c(1,3)) ## psi plot(density(psi), main="", xlab="psi", bty="n", lwd=2) abline(v=quantile(psi, c(0.025, 0.5, 0.975)), lty=c(2,3,2), lwd=2, col=2) ## sigma2 plot(density(sigma2), main="", xlab="sigma2", bty="n", lwd=2) abline(v=quantile(sigma2, c(0.025, 0.5, 0.975)), lty=c(2,3,2), lwd=2, col=2) ## tau2 plot(density(tau2), main="", xlab="tau2", bty="n", lwd=2) abline(v=quantile(tau2, c(0.025, 0.5, 0.975)), lty=c(2,3,2), lwd=2, col=2) ## assessing shrinkage ## plotting the data average versus the group posterior mean plot(ybar, apply(mu, 2, mean), main="", bty="n") abline(0,1) ## ploting the group size versus the group posterior mean plot(n[o], (ybar-apply(mu, 2, mean))[o], main="", bty="n") abline(h=0) ## ranking schools plot(density(mu[,46]), type="l", lwd=2, main="", bty="n", xlim=range(c(Y[[46]], Y[[82]])), ylim=c(-0.02, 0.22)) abline(h=0, col="gray") lines(density(mu[,82]), type="l", lwd=2, lty=2, col=2) abline(v=mean(psi), col=3, lty=3, lwd=3) abline(h=-0.01) points(x=Y[[46]], y=rep(-0.01, n[46])) points(x=ybar[46], y=-0.01, pch=19) abline(h=-0.02) points(x=Y[[82]], y=rep(-0.02, n[82]), col=2) points(x=ybar[82], y=-0.02, pch=19, col=2) legend("right", c("school 46", "school 82", "psi-bar"), lwd=2, lty=1:3, col=1:3, bty="n") ## ## Begin group-specific variances ## ## nu0.fullcond: ## ## evaluating the (logarithm of the full conditional ## distribution of nu.0 given sigma1^2, ..., sigmam^2 ## and sigma0^2 nu.0.lfullcond <- function(x, sigma2s, sigma2.0, alpha) { m <- length(sigma2s) term1 <- m*(0.5*x*log(sigma2.0*x/2) - lgamma(x/2)) term2 <- (1-x/2)*sum(log(sigma2s)) term3 <- -x*(alpha + 0.5*sigma2.0*sum(1/sigma2s)) return(term1 + term2 + term3) } ## nu.0.draw: ## ## draw from the full posterior distribution of nu.0, ## restricted to the range 1:NUMAX (usually sum(n)) nu.0.draw <- function(sigma2s, sigma2.0, alpha, NUMAX) { x <- 1:NUMAX lp <- nu.0.lfullcond(x, sigma2s, sigma2.0, alpha) p <- exp(lp - max(lp)) nu <- sample(x=x, size=1, prob=p) return(nu) } ## an example full conditional posterior mass for nu.0 nu.0s <- 1:N nu.0lp <- nu.0.lfullcond(nu.0s, rep(sigma2[40],m), sigma2.0, 2) nu.0p <- exp(nu.0lp-max(nu.0lp)) plot(nu.0s, nu.0p/sum(nu.0p), type="h", lwd=2, bty="n", xlab="nu.0", xlim=c(10,40), ylab="mass", main="") ## ## Now a Gibbs sampler with group-specific variances ## ## prior parameters for nu.0 and sigma2.0 alpha <- 1 a <- 1; b <- 1/100 ## allocate space for the samples S <- 5000 sigma2.2 <- mu.2 <- matrix(NA, nrow=S, ncol=m) nu.0 <- sigma2.0 <- psi.2 <- tau2.2 <- rep(NA, S) ## initial values of parameters mu.2[1,] <- ybar sigma2.2[1,] <- s2 psi.2[1] <- mean(mu[1,]) tau2.2[1] <- var(mu[1,]) nu.0[1] <- 10 sigma2.0[1] <- mean(s2) ## Gibbs sampling loop for(s in 2:S) { ## sample new values of the mus for(j in 1:m) { V.mu <- 1/(n[j]/sigma2.2[s-1,j] + 1/tau2.2[s-1]) m.mu <- V.mu * (ybar[j]*n[j]/sigma2.2[s-1,j] + psi.2[s-1]/tau2.2[s-1]) mu.2[s,j] <- rnorm(1, m.mu, sqrt(V.mu)) } ## sample a new value of sigma2 for(j in 1:m) { nu.n <- nu.0[s-1] + n[j] sigma2.s <- nu.0[s-1]*sigma2.0[s-1] sigma2.s <- sigma2.s + sum((Y[[j]] - mu[s,j])^2) sigma2.2[s,j] <- 1/rgamma(1, nu.n/2, sigma2.s/2) } ## sample a new value of psi V.psi <- 1/(m/tau2.2[s-1] + 1/g2.0) m.psi <- V.psi * (m*mean(mu.2[s,])/tau2.2[s-1] + psi.0/g2.0) psi.2[s] <- rnorm(1, m.psi, sqrt(V.psi)) ## sample a new value of tau2 eta.m <- eta.0 + m tau2.s <- eta.0*tau2.0 + sum((mu.2[s,] - psi.2[s])^2) tau2.2[s] <- 1/rgamma(1, eta.m/2, tau2.s/2) ## sample a new value of sigma2.0 sigma2.0[s] <- rgamma(1, a + m*nu.0[s-1]/2, b + nu.0[s-1]*sum(1/sigma2.2[s,])/2) ## sample a new value of nu.0 nu.0[s] <- nu.0.draw(sigma2.2[s,], sigma2.0[s], alpha, N) } ## posterior summaries of hierarchical parameters par(mfrow=c(2,2)) ## comparing psis plot(density(psi), main="", xlab="psi", bty="n", lwd=2) lines(density(psi.2), lwd=2, col=2, lty=2) legend("topright", c("= vars", "!= vars"), col=1:2, lwd=2, lty=1:2) ## comapring tau2s plot(density(tau2), main="", xlab="tau2", bty="n", lwd=2) lines(density(tau2.2), lwd=2, col=2, lty=2) ## plotting nu.0 r <- range(nu.0) nu.0s <- r[1]:r[2] nu.0p <- rep(0, length(nu.0s)) for(i in 1:length(nu.0s)) nu.0p[i] <- mean(nu.0 == nu.0s[i]) plot(nu.0s, nu.0p, type="h", lwd=2, bty="n", xlab="nu.0", ylab="mass", main="") ## plotting sigma2.0 plot(density(sigma2.0), main="", xlab="sigma2.0", bty="n", lwd=2) ## assessing shrinkage in the variances ## plotting the data variance versus the group posterior variance plot(s2, apply(sigma2.2, 2, mean), main="", bty="n") abline(0,1) ## ploting the group size versus the group posterior mean plot(n[o], (s2-apply(sigma2.2, 2, mean))[o], main="", bty="n") abline(h=0)