demo_hedwig <- function() {
    ## Hedwig is a new alert system in the advertiser platform with both
    ## front-end and back-end changes. The difference is mostly UI effect,
    ## but there are interactions due to logic of ranking, viewing and
    ## dismissing alerts. Basically the old alert system displays all the
    ## alerts flatly before the data table, and the new hedwig system uses a
    ## alert button on the menu bar with a one line summary pointing to the
    ## button.

    ## Before Adjustment

    ## In spite of the randomized experiment design, the treatment and
    ## control population may not be comparable because we only want to
    ## consider customers with certain impression. In this example, we want
    ## to compare the treated and control customers who both had creative
    ## disapproval alert impression in the experiment week of 1/19/2014.

    hedwig.data <- read.csv(
        gfile('/cns/ie-d/home/dancsi/hedwig_opt_cda_features_20140730.csv'),
        header = TRUE)
    hedwig.data <- data.table(hedwig.data)

    # a subsample of the data
    set.seed(1)
    hedwig.data <- hedwig.data[sample(1:nrow(hedwig.data), nrow(hedwig.data)/3),]

    ## The next code chunk computes the average weekly log spend and its
    ## confidence interval (by 25 jackknife samples) in each customer
    ## id mod group.
    data <- subset(hedwig.data, has_creative_disapproved_impression == "True")
    dt.jackknife.interface <- function(x, dim="", com="difference", cal="mean") {
        res <- Jackknife.pair.bucketed(x, x * 0, 25, com, cal)
        res <- sapply(data.frame(dim, t(res)), list)
        names(res) <- c("metric.ci", "ci.low", "ci.mid", "ci.up")
        return(res)
    }
    hedwig.mean.ci <-
        rbind(data[, dt.jackknife.interface(log(spend_7_day_usd_20131208 + 1),
                                            "7_day_spend_usd__20131208 (before experiment)"),
                   by = list(experiment_arm, customer_id_mod_percentage_group)],
              data[, dt.jackknife.interface(log(spend_7_day_usd_20140119 + 1),
                                            "7_day_spend_usd__20140119 (during experiment)"),
                   by = list(experiment_arm, customer_id_mod_percentage_group)]
              )
    p1 <- ggplot(hedwig.mean.ci) +
        aes(x = customer_id_mod_percentage_group, y = ci.mid,
            ymin = ci.low, ymax = ci.up, color = experiment_arm) +
                geom_point() + geom_errorbar() + facet_wrap(~metric.ci)
    p1 <- p1 + ggtitle(expression(atop("Log spend comparisons for Hedwig",
        atop(italic("Had creative disapproval alert impression"), "")))) +
            labs(y="Mean of Log Spend") +
                theme(text=element_text(size=20),
                      axis.text.x=element_text(angle = 45, hjust=1),
                      strip.text.x=element_text(size=12)) +
                          scale_y_continuous(labels = comma)
    p1

    ## The results shows that without any
    ## adjustment, the treatment arm has higher spend than the control arm
    ## both before and during the experiment period, which suggests that the
    ## two populations are not comparable.

    ## After adjustment
    ## First, I exclude the first 10\% of the customer because they have longer
    ## exposure to the treatment (from December).
    hedwig.data.subset <-
        subset(hedwig.data, as.numeric(customer_id_mod_percentage_group) >= 2)


    ## Next, the adjustment is based on the entire control population and
    ## treated and impressed population.
    hedwig.data.subset <- subset(hedwig.data.subset,
                                 (experiment_arm == "control" |
                                      has_creative_disapproved_impression == "True"))
    hedwig.data.subset <- data.frame(hedwig.data.subset)

    ## Reorganize the data structure, features are log of weekly
    ## spend, campaign management tab visits, and opportunities tab visits
    ## prior to the experiment.
    features.col <- grepl("2013|20140105|20140112", names(hedwig.data.subset))
    data <- list(T = as.numeric(hedwig.data.subset$experiment_arm) - 1,
                 features =
                     log(data.matrix( hedwig.data.subset[, features.col] ) + 1, 2),
                 Y = log(hedwig.data.subset[, "spend_7_day_usd_20140119"]+1))

    ## What transformation of covaraites should we use in the PS and outcome
    ## regression model? We explore this through the following marginal plots.
    data.df <- data.frame(data)
    large.spend.ind <- which(data.df$features.spend_7_day_usd_20140112 > 15)
    low.spend.ind <- which(data.df$features.spend_7_day_usd_20140112 <= 15)
    loess.model.spend <- loess(
        T ~ features.spend_7_day_usd_20140112,
        data = data.df[c(large.spend.ind, sample(low.spend.ind, 10000)),])
    loess.model.spend <- data.frame(loess.model.spend$x,
                                    fitted = loess.model.spend$fitted)
    p1 <- ggplot(data.df) + aes(x = features.spend_7_day_usd_20140112, y = T) +
        geom_point(alpha = 0.2) +
            geom_line(aes(x = features.spend_7_day_usd_20140112, y = fitted),
                      data = loess.model.spend)
    p1.logit <- ggplot(loess.model.spend) +
        aes(x = features.spend_7_day_usd_20140112, y = qlogis(fitted)) +
            geom_line()

    large.cm.ind <- which(data.df$features.number_of_cm_tab_visits_20140112 > 6)
    low.cm.ind <- which(data.df$features.number_of_cm_tab_visits_20140112 <= 6)
    loess.model.cm <- loess(
        T ~ features.number_of_cm_tab_visits_20140112,
        data = data.df[c(large.cm.ind, sample(low.cm.ind, 10000)),])
    loess.model.cm <- data.frame(loess.model.cm$x,
                                 fitted = loess.model.cm$fitted)
    p2 <- ggplot(data.df) +
        aes(x = features.number_of_cm_tab_visits_20140112, y = T) +
            geom_point(alpha = 0.2) +
                geom_line(aes(x = features.number_of_cm_tab_visits_20140112,
                              y = fitted),
                          data = loess.model.cm)
    p2.logit <- ggplot(loess.model.cm) +
        aes(x = features.number_of_cm_tab_visits_20140112, y = qlogis(fitted)) +
            geom_line()

    large.opt.ind <- which(data.df$features.number_of_opt_tab_visits_20140112 >= 3)
    low.opt.ind <- which(data.df$features.number_of_opt_tab_visits_20140112 < 3)
    loess.model.opt <- loess(
        T ~ features.number_of_opt_tab_visits_20140112,
        data = data.df[c(large.opt.ind, sample(low.opt.ind, 10000)),])
    loess.model.opt <- data.frame(loess.model.opt$x,
                                  fitted = loess.model.opt$fitted)
    p3 <- ggplot(data.df) +
        aes(x = features.number_of_opt_tab_visits_20140112, y = T) +
            geom_point(alpha = 0.2) +
                geom_line(aes(x = features.number_of_opt_tab_visits_20140112,
                              y = fitted),
                          data = loess.model.opt)
    p3.logit <- ggplot(loess.model.opt) +
        aes(x = features.number_of_opt_tab_visits_20140112,
            y = qlogis(fitted)) + geom_line()

    grid.arrange(p1, p1.logit, p2, p2.logit, p3, p3.logit)


    ## Similar plots for outcome
    pp1 <- ggplot(subset(data.df, T == 0)) +
            aes(x = features.spend_7_day_usd_20140112, y = Y) +
                geom_point(alpha = 0.1) + geom_smooth()
    pp2 <- ggplot(subset(data.df, T == 0)) +
        aes(x = features.number_of_cm_tab_visits_20140112, y = Y) +
            geom_point(alpha = 0.1) + geom_smooth()
    pp3 <- ggplot(subset(data.df, T == 0)) +
        aes(x = features.number_of_opt_tab_visits_20140112, y = Y) +
            geom_point(alpha = 0.1) + geom_smooth()
    grid.arrange(pp1, pp2, pp3)


    ## From these plots, we should add quadratic terms of CM and OPT tab visits.
    tmp <- data.frame(
        quad = data$features[, grep("cm_tab_visits|opt_tab_visits",
            colnames(data$features))]^2)
    data$features <- data.frame(cbind(data$features, tmp))

    ## Since the spend has such a large range (from 0 to 2.5 million), it is
    ## difficult to use a single propensity score model linear in the
    ## features. Instead, I stratify the data by pre-experiment spend. Note
    ## that the highest stratum has three times wider range than the others
    ## in log scale.
    data$stratum <- cut(data$features[, "spend_7_day_usd_20131103"],
                        c(seq(0, 14, by = 2), Inf),
                        right = FALSE)
    table(data$stratum)

    ## Now we can estimate the treatment effect in each stratum, by setting
    ## stratified to TRUE in function get.all.weights.
    method.list = c("unif,nr,none,none",
        "glm,nr,none,none", "ebal,nr,none,none",
        "glm,nr,lm,dr", "ebal,nr,lm,dr")
    hedwig.result <- get.effect.obdata(data, stratified = TRUE, effect = "ATT",
                                       method.list = method.list, print.level = 0)
    options(digits=3)
    print(hedwig.result$effect.est)

    ## Another interesting thing to see is what will happen
    ## if we use fewer features.
    ## Here we only use the 20140112 week features.
    data2 <- data
    data2$features <- data$features[, grep("20140112", names(data$features))]
    hedwig.result2 <- get.effect.obdata(data2, stratified = TRUE, effect = "ATT",
                                        method.list = method.list, print.level = 0)
    print(hedwig.result2$effect.est)
    rm(hedwig.result2)

    ## to get standard errors, we can use bootstrap
    B <- 50
    effect.est.bs <- array(0, c(B, length(levels(data$stratum)),
                                length(method.list)))
    n <- length(data$stratum)
    for (b in 1:B) {
        s <- sample(1:n, n, replace = TRUE)
        effect.est.bs[b, , ] <-
            get.effect.obdata(list(T = data$T[s],
                                   Y = data$Y[s],
                                   features = data$features[s, ],
                                   stratum = data$stratum[s]),
                              effect = "ATE",
                              stratified = TRUE,
                              method.list = method.list,
                              print.level = 0)$effect.est
    }
    effect.est.se <- apply(effect.est.bs, c(2, 3), sd, na.rm = TRUE)
    effect.est <- hedwig.result$effect.est
    rownames(effect.est.se) <- rownames(effect.est)
    colnames(effect.est.se) <- colnames(effect.est)

    ## Now plot the estimated causal effect with error bar
    df.est <- melt(effect.est)
    df.se <- melt(effect.est.se)
    df <- data.frame(stratum = df.est$X1, method = df.est$X2,
                     est = df.est$value, se = df.se$value)
    df$stratum <- factor(df$stratum, levels(data$stratum))
    p <- ggplot(df) + aes(x = stratum, y = est) +
        geom_point(aes(group = method, color = method), shape = 21, size = 3) +
            geom_errorbar(aes(ymax = est + 2 * se,
                              ymin = est - 2 * se,
                              group = method, color = method))
    p <- p + theme(text = element_text(size = 15)) +
        ggtitle("Hedwig Experiment After Adjustment") +
            ylab("difference in log(spend)") + xlab("Stratum by log2(previous week spend)")
    p

    ## Propensity Score Model Diagnostics
    diagnostics.object <- hedwig.result$output[[3]][[4]]
    ps.model <- diagnostics.object$T.model$model

    ## Hosmer and Lemeshow goodness of fit test
    library(ResourceSelection)
    hl <- hoslem.test(ps.model$data$class, fitted(ps.model), g = 15)
    hl

    ## Residual vs. x plot
    plot.data <- data.frame(
        y = residuals(ps.model, type = "pearson"),
        x = ps.model$data$features.spend_7_day_usd_20140112,
        t = factor(ps.model$data$class))
    ggplot(plot.data) + aes(x = x, y = y, color = t) + geom_point(alpha = 0.5) +
        ylab("Pearson Residuals") + xlab("log2 spend_7_day_usd_20140112") +
            ggtitle("Diagnostic Plot for PS model") + geom_smooth(color = "black") +
                geom_smooth(data = subset(plot.data, t == 0)) +
                    geom_smooth(data = subset(plot.data, t == 1))

    ## ROC curve
    library(ROCR)
    prob <- prediction(fitted(ps.model), ps.model$data$class)
    tprfpr <- performance(prob, "tpr", "fpr")
    plot(tprfpr)
    abline(a=0, b=1, lty=2)

}
