# Create data n <- 100 x1 <- rnorm(n) x2 <- x1 + 0.5*rnorm(n) x1 <- (x1 - mean(x1))/sd(x1) x2 <- (x2 - mean(x2))/sd(x2) x <- cbind(x1, x2) # plot data (on equal scales) library(MASS) eqscplot(x1, x2) # SVD of x svd_x <- svd(x) str(svd_x) (svd_x$v) # Note how they are proportional to (1, 1) and (1, -1) (exactly). This is because of # the scaling of the columns # Add lines corresponding to the (loading vectors of the) principal comps abline(0, 1) abline(0, -1) # Create responses sigma <- 5 errors <- sigma*rnorm(n) errors <- errors - mean(errors) sig_easy <- sqrt(n)*svd_x$u[, 1] sig_hard <- sqrt(n)*svd_x$u[, 2] sig_med <- (sig_easy + sig_hard)/sqrt(2) y_easy <- sig_easy + errors y_med <- sig_med + errors y_hard <- sig_hard + errors # Perform ridge lambda_max <- 200 nlambda <- 101 lambda <- seq(from=0, to=lambda_max, length.out=nlambda) lambda_mat <- rep(lambda, each = 2) dim(lambda_mat) <- c(2, nlambda) U <- svd_x$u U_t <- t(U) V <- svd_x$v D_lambda_inv <- svd_x$d^2/(svd_x$d^2 + lambda_mat) # 2 by nlambda matrix coef_easy <- V %*% (D_lambda_inv * as.numeric((U_t %*% y_easy))) fit_easy <- U %*% (D_lambda_inv * as.numeric(U_t %*% y_easy)) fit_med <- U %*% (D_lambda_inv * as.numeric(U_t %*% y_med)) fit_hard <- U %*% (D_lambda_inv * as.numeric(U_t %*% y_hard)) # Plot Coefficients matplot(main="Coefficients", ylab=expression(hat(beta)), xlab=expression(lambda), x=lambda, y=t(coef_easy), type="l") # Prediction error nreps <- 50000 out_mat_easy <- out_mat_hard <- out_mat_med <- matrix(nrow=nlambda, ncol=nreps) for (i in seq_len(nreps)) { errors <- sigma*rnorm(n) errors <- errors - mean(errors) sig_easy <- sqrt(n)*svd_x$u[, 1] sig_hard <- sqrt(n)*svd_x$u[, 2] y_easy <- sig_easy + errors y_med <- sig_med + errors y_hard <- sig_hard + errors fit_easy <- U %*% (D_lambda_inv * as.numeric(U_t %*% y_easy)) fit_med <- U %*% (D_lambda_inv * as.numeric(U_t %*% y_med)) fit_hard <- U %*% (D_lambda_inv * as.numeric(U_t %*% y_hard)) out_mat_easy[, i] <- colMeans((fit_easy - sig_easy)^2) out_mat_med[, i] <- colMeans((fit_med - sig_med)^2) out_mat_hard[, i] <- colMeans((fit_hard - sig_hard)^2) } # Plot performance op <- par(mfrow=c(2, 3)) mycol2 <- rgb(0, 0, 0, max = 255, alpha = 70) # Easy eqscplot(x1, x2, xlab="", ylab="", col=mycol2) # Add lines corresponding to the (loading vectors of the) principal comps abline(0, 1) abline(0, -1) # Add lines corresponding to contours of regression function sapply(seq(-20*sqrt(2), 20*sqrt(2), by=sqrt(2)), function(i) abline(i, -1, lty=3)) # Med eqscplot(x1, x2, xlab="", ylab="", col=mycol2) # Add lines corresponding to the (loading vectors of the) principal comps abline(0, 1) abline(0, -1) # Add lines corresponding to contours of regression function sapply(seq(-20, 20, by=1), function(i) abline(i, 0, lty=3)) # Hard eqscplot(x1, x2, xlab="", ylab="", col=mycol2) # Add lines corresponding to the (loading vectors of the) principal comps abline(0, 1) abline(0, -1) # Add lines corresponding to contours of regression function sapply(seq(-20*sqrt(2), 20*sqrt(2), by=sqrt(2)), function(i) abline(i, 1, lty=3)) # light grey transparent mycol <- rgb(0, 0, 0, max = 255, alpha = 20) #Easy plot(main="", ylab="MSPE", xlab=expression(lambda), x=lambda, y=rowMeans(out_mat_easy), type="l") rect(0, 0, 200, 1, col=mycol, lty=0) # Med y_med_plot <- rowMeans(out_mat_med) plot(main="", ylab="MSPE", xlab=expression(lambda), x=lambda, y=y_med_plot, type="l") rect(0, 0, max(lambda[y_med_plot <= y_med_plot[1]]), 1, col=mycol, lty=0) # Hard y_hard_plot <- rowMeans(out_mat_hard) plot(main="", ylab="MSPE", xlab=expression(lambda), x=lambda, y=y_hard_plot, type="l") rect(0, 0, max(lambda[y_hard_plot <= y_hard_plot[1]]), 1, col=mycol, lty=0) par(op)