## code to accompany the NELS math example from ## lecture 17 of PSTAT 215A, Bayesian Inference, UCSB ## ## Robert B. Gramacy, adapted from Peter Hoff ## load libraries and other files library(mvtnorm) library(coda) source("rwish.R") ## read in the data file math <- read.table("nels_math.txt", header=TRUE) ## allocate space to hold the data and summary ## statistics in a more useful structure XtX <- X <- Y <- list() schools <- unique(math$school) m <- length(unique(schools)) n <- ybar <- ymed <- s2 <- rep(NA, m) ols <- matrix(NA, nrow=m, ncol=3) ## populate structures with the data from the file for(j in 1:m) { school <- math$school == schools[j] Y[[j]] <- math$score[school] ybar[j] <- mean(Y[[j]]) n[j] <- length(Y[[j]]) s2[j] <- var(Y[[j]]) xj <- math$ses[school] xj <- xj - mean(xj) X[[j]] <- cbind(rep(1,n[j]), xj) XtX[[j]] <- t(X[[j]]) %*% X[[j]] fit.lm <- lm(Y[[j]]~X[[j]]-1) ols[j,] <- c(coef(fit.lm), summary(fit.lm)$sigma^2) } N <- sum(n) ## plot the OLS regression lines for each school plot( range(math$ses),range(math$score), bty="n", type="n",xlab="SES", ylab="math score") for(j in 1:m) abline(ols[j,1:2],col="gray") abline(mean(ols[,1]), mean(ols[,2]), lwd=2) ## plot the intercept versus sample size plot(n, ols[,1], xlab="sample size", ylab="intercept", bty="n") abline(h=mean(ols[,1]), lwd=2) ## plot the slope versus sample size plot(n, ols[,2], xlab="sample size", ylab="slope", bty="n") abline(h=mean(ols[,2]), lwd=2) ## ## time for Gibbs Sampling inference ## ## set up (Empirical Bayes/unit information) priors ## following Hoff mu.0 <- apply(ols[,-3], 2, mean) Lambda.0 <- cov(ols[,-3]) Lambdai.0 <- solve(Lambda.0) S.0 <- Lambda.0 Si.0 <- Lambdai.0 eta.0 <- 4 s2.0 <- mean(ols[,3]) nu.0 <- 1 ## allocate space for MCMC samples via GS S <- 10000 beta <- array(NA, dim=c(m,S,2)) beta.0 <- matrix(NA, nrow=S, ncol=2) Sigma.0 <- array(NA, dim=c(S,2,2)) sigma2 <- rep(NA, S) ## initial values for(j in 1:m) beta[j,1,] <- ols[j,-3] beta.0[1,] <- mu.0 sigma2[1] <- s2.0 Sigma.0[1,,] <- S.0 ## Gibbs sampling rounds -- takes about an hour for(s in 2:S) { ## sample each beta[j,] Sigmai.0 <- solve(Sigma.0[s-1,,]) Sib <- Sigmai.0 %*% beta.0[s-1,] for(j in 1:m) { Sigma.n <- solve(Sigmai.0 + XtX[[j]]/sigma2[s-1]) beta.n <- drop(Sigma.n %*% (Sib + t(X[[j]]) %*% Y[[j]]/sigma2[s-1])) beta[j,s,] <- rmvnorm(1, beta.n, Sigma.n) } ## sample beta.0 Lambda.m <- solve(Lambdai.0 + m*Sigmai.0) bbar <- apply(beta[,s,], 2, mean) mu.m <- Lambda.m %*% (Lambdai.0 %*% mu.0 + m*Sigmai.0 %*% bbar) beta.0[s,] <- rmvnorm(1, mu.m, Lambda.m) ## sample Sigma.0 S.beta0 <- matrix(0, nrow=2, ncol=2) RSS <- 0 for(j in 1:m) { ## calculate S.beta0 for Sigma.0 and RSS for sigma2 S.beta0 <- S.beta0 + (beta[j,s,] - beta.0[s,]) %*% t(beta[j,s,] - beta.0[s,]) RSS <- RSS + sum((Y[[j]] - X[[j]] %*% beta[j,s,])^2) } Sigma.0[s,,] <- solve(rwish(eta.0 + m, solve(S.0 + S.beta0))) ## sample sigma2 sigma2[s] <- 1/rgamma(1, (nu.0 + N)/2, (nu.0*s2.0 + RSS)/2) } ## calculate effective sample sizes to check for good MCMC effectiveSize(beta.0[-(1:1000),2]) ## plot a comparison of beta.0[,2] versus beta[,2] for ## a hypothetical new school plot(density(beta.0[-(1:1000),2]), xlim=c(-6,8), main="", bty="n", lwd=2, xlab="slope parameter", ylab="posterior density") ## compare to a new sample beta beta.new <- matrix(NA, nrow=S, ncol=2) for(s in 1:S) beta.new[s,] <- rmvnorm(1, beta.0[s,], Sigma.0[s,,]) lines(density(beta.new[-(1:1000),2]), lwd=2, lty=2, col=2) legend("left", c("beta.0[,2]", "beta.new[,2]"), lwd=2, lty=1:2, col=1:2, bty="n") ## posterior probability that beta.new[,2] is negative mean(beta.new[,2] < 0) ## plot mean of each beta plot( range(math$ses),range(math$score), bty="n", type="n",xlab="SES", ylab="math score") for(j in 1:m) abline(apply(beta[j,-(1:1000),], 2, mean), col="gray") abline(apply(beta.0[-(1:1000),], 2, mean), lwd=2) ## save an image so we don't have to re-run this long MCMC save.image(file="nels_math_hierr.Rsave")