## code to accompany the diabetes model selection example ## from lecture 15 of PSTAT 215A, Bayesian Inference, UCSB ## ## Robert B. Gramacy, adapted from Peter Hoff ## load necessary libraries & data library(monomvn) data(diabetes) attach(diabetes, warn.conflicts=FALSE) ## x contains the regression coefficients -- it has been ## pre-standardized to have a unit L2 norm ## form the full design matrix of covariates with main effects ## quadratic terms and interactions X <- cbind(x, x[,c(1, 3:10)]^2) cnames <- colnames(x) colnames(X)[11:19] <- paste(cnames[-2], "2", sep="") nc <- 19 for(j in 1:(ncol(x)-1)) { for(k in (j+1):ncol(x)) { nc <- nc+1 X <- cbind(X, x[,j]*x[,k]) colnames(X)[nc] <- paste(cnames[j], cnames[k], sep=".") } } ## ## model selection by AIC ## ## start with the biggest possible model fit <- lm(y~., data=as.data.frame(X)) summary(fit) ## use step (via BIC) fit.step <- step(fit, scope=list(upper=~., lower=~1), k=log(nrow(X)), direction="both", trace=FALSE) summary(fit.step) ## ## now the Bayesian version ## ## fit the model using a "ridge" prior for stability bfit <- bridge(X, y, T=5000, mprior=c(0.5, 0.5)) ## plot the posterior regression coefficients plot(bfit) plot(bfit, "m") ## extract the summary s <- summary(bfit) names(s$bn0) <- colnames(X) sort(s$bn0, decreasing=TRUE)[1:length(coef(fit.step))] ## extract the MAP m <- which.max(bfit$lpost) bmap <- bfit$beta[m,] names(bmap) <- colnames(X) bmap[bmap > 0]