## code to accompany the Poisson example(s) from lectures ## 4,5, and 6 of PSTAT 215A, Bayesian Inference, UCSB ## ## Robert B. Gramacy adapted from Peter Hoff ## prior parameters a <- 2 b <- 1 ## data in group 1 n1 <- 111 sy1 <- 217 ## data in group 2 n2 <- 44 sy2 <- 66 ## comparing the posterior means data.frame(none=(a+sy1)/(b+n1), bach=(a+sy2)/(b+n2)) ## comparing the posterior modes data.frame(none=(a+sy1-1)/(b+n1), bach=(a+sy2-1)/(b+n2)) ## comparing the posetior 95% CIs q <- data.frame(none=qgamma(c(0.025, 0.975), a+sy1, b+n1), bach=qgamma(c(0.025, 0.975), a+sy2, b+n2)) row.names(q) <- c("2.5%", "97.5%") q ## plotting the prior and posteriors theta <- seq(0,5,length=1000) p1 <- dgamma(theta, a+sy1, b+n1) p2 <- dgamma(theta, a+sy2, b+n2) plot(theta, p1,xlab="theta", ylab="density", type="l", lwd=2, col=2, lty=2, bty="n") lines(theta, p2, lwd=2, col=3, lty=3) lines(theta, dgamma(theta, 2, 1), lwd=2, col=1, lty=1) legend("topright", c("prior", "post none", "post bach"), lwd=2, col=1:3, lty=1:3, bty="n") ## calculating the probability that theta_1 > theta_2 T <- 100000 t1.mc <- rgamma(T, a+sy1, b+n1) t2.mc <- rgamma(T, a+sy2, b+n2) mean(t1.mc > t2.mc) ## plot the density of the ratio of theta-1 / theta-2 plot(density(t1.mc/t2.mc), main="", xlab="theta1/theta2", ylab="density", bty="n", lwd=2) ## plotting the posterior predictives y <- 0:10 barplot(rbind(dnbinom(y, size=a+sy1, mu=(a+sy1)/(b+n1)), dnbinom(y, size=a+sy2, mu=(a+sy2)/(b+n2))), names=y, beside=TRUE, ylab="probability", xlab="children", args.legend=list(x="top", bty="n"), legend.text=c("none", "bach")) ## calculating p(y1-tilde > y2-tilde) y1t <- rnbinom(T, size=a+sy1, mu=(a+sy1)/(b+n1)) y2t <- rnbinom(T, size=a+sy2, mu=(a+sy2)/(b+n2)) mean(y1t > y2t) mean(y1t == y2t) ## now in full generality y1.mc <- rpois(T, t1.mc) y2.mc <- rpois(T, t2.mc) mean(y1.mc > y2.mc) ## MC approximation to difference in y_1 and y_2 y1my2.mc <- y1.mc - y2.mc r <- range(y1my2.mc) yd <- r[1]:r[2] ydd <- rep(NA, length(yd)) for(i in 1:length(yd)) { ydd[i] <- sum(y1my2.mc == yd[i]) } ydd <- ydd/length(y1my2.mc) ## plot the mass of the difference barplot(ydd, names=yd, beside=TRUE, bty="n", ylab="probability", xlab="d = y1-y2") ## predictive distribution model checking ## read in the actual data y1 <- scan("birth_edu_none.txt") ## calculate the empirical and predictive distributions ecdf<-(table(c(y1,0:9))-1 )/sum(table(y1)) ecdf.mc<- dnbinom(0:9,size=a+sum(y1),mu=(a+sum(y1))/(b+length(y1))) ## plot the distributions for comparison plot(0:9+.1,ecdf.mc,type="h",lwd=5, xlab="children", bty="n", ylab="P(Y = y)", col=1, ylim=range(c(ecdf, ecdf.mc))) points(0:9-.1, ecdf, lwd=5, col=2, type="h") legend("right", legend=c("predictive","empirical"), lwd=5, col=1:2, bty="n") ## simulating from the predictive distribution of the ratio ## of the number of women with two children to one t.mc <- rep(NA, T) for(t in 1:T) { theta1 <- rgamma(1, a+sy1, b+ n1) y1.mc <- rpois(n1, theta1) t.mc[t] <- sum(y1.mc==2)/sum(y1.mc==1) } ## the true ratio in the data (which should be 2) t.obs <- sum(y1==2)/sum(y1==1) ## plotting a historgram of the samples of ratios to ## see what t.obs = 2 lies hist(t.mc, prob=TRUE, main="", ylab="", xlab="t(Y-tilde)") segments(t.obs, 0, t.obs, .25, col=2, lwd=3)