demo_optgeo <- function() {

    library(gfile)
    library(cfs)
    library(covalign)
    library(data.table)
    library(plyr)
    InitGoogle()

    ## read the opportunities geo experiment data
    optgeo.data <- read.csv(
        gfile('/cns/ie-d/home/dancsi/opt_geo_cities_simple_features-20140811.csv'),
        header = TRUE,
        colClasses = c("character", NA)
    )
    optgeo.data <- data.table(optgeo.data)

    ## There is also a bug in the data so that sometimes the geo_id
    ## in opt_geo_cities_simple_features-20140811.csv will be zero.
    ## This is still unfixed, I'm not sure what is causing it,
    ## but you should probably exclude these, or somehow treat them as unknown.
    optgeo.data <- subset(optgeo.data,
                          geo_id != 0 & opt_applies_7_day_20140625 != "NULL")

    ## The geo_id information is stored in two different csv files
    geo1 <- read.csv(
        gfile("/cns/ie-d/home/dancsi/geo_to_state_and_city1.csv"),
        header = FALSE)
    geo2 <- read.csv(
        gfile("/cns/ie-d/home/dancsi/geo_to_state_and_city2.csv"),
        header = FALSE)
    geo <- rbind(geo1, geo2)
    colnames(geo) <- c("geo_id", "region_id", "city_id")

    ## a geo_id is either a regional level or a city level
    ## in this experiment we want to compare the cost uplift
    ## of users exposed to different geo opportunities.
    geo$class <- factor(apply(geo, 1, function(x) (x[2] != 0) + (x[3] != 0)))
    ## if geo$class = 1, it's a state-level id
    ## if geo$class = 2, it's a city-level id
    levels(geo$class) <- c("state", "city")
    table(geo$class)

    ## Now join geo with optgeo.data
    data <- merge(optgeo.data, geo, by = "geo_id")

    ## For this experiment, we want to estimate
    ## E[Y(treatment)|would apply city suggestion] -
    ##   E[Y(control)|would apply city suggestion]
    ## The later is conterfactual.
    ## For more details, see demo_hedwig.R

    ## Before applying get.effect, first we need to summarize the data by user
    customer_id_col <- which(names(data) == "hashed_customer_id")
    feature_cols <- grep(paste0("spend_7_day|number_of_cm_tab_visits|",
                                "opt_applies|opt_clicks|_opt_views"),
                         names(data))
    experiment_col <- grep("experiment_name", names(data))
    data.features <- data[, c(customer_id_col, feature_cols, experiment_col),
                          with = FALSE]
    data.features <- data.features[!duplicated(data.features$hashed_customer_id), ]
                                        # class = TRUE
    data.class <- ddply(data, .(hashed_customer_id), summarize,
                        applied.city.sugg = sum(class == "city") > 0)
    data.cost <- ddply(data, .(hashed_customer_id), summarize,
                       cost_uplift_total = sum(cost_uplift))
    if (nrow(data.features) != nrow(data.cost)) {
        warning("Dimensions don't match! Some summarization step is wrong.")
    }
    data <- merge(data.cost, data.features)
    data <- merge(data, data.class)

    ## remove treated & no city applied
    levels(data$experiment_name) <- c(rep("CONTROL", 4), "EXP")
    data <- subset(data, experiment_name == "CONTROL" | applied.city.sugg)

    ## Now transform into obdata
    data <- list(T = as.numeric(data$experiment_name) - 1,
                 features = log(data.matrix(
                     data[,grepl("20140625", names(data))]) + 1, 2),
                 Y = data[, "cost_uplift_total"])
    table(data$T)

    ## There are only 147 treated units, maybe not a great idea to stratify the data

    ## Let's use get.effect directly
    method.list = c("unif,nr,none,none",
        "glm,nr,none,none", "ebal,nr,none,none", "cbps,nr,none,none",
        "glm,nr,lm,dr", "ebal,nr,lm,dr", "cbps,nr,lm,dr",
        "glm,nr,wls,none", "ebal,nr,wls,none", "cbps,nr,wls,none",
        "glm,pop,lm,dr","ebal,pop,lm,dr", "cbps,pop,lm,dr")
    optgeo.result <- get.effect.obdata(data, stratified = FALSE, effect = "ATE",
                                       method.list = method.list, print.level = 2)
    options(digits = 3)
    optgeo.result$effect.est
    optgeo.result$effect.est.se

}
