#' A wrapper that returns weights based on different propensity score estimates.
#' @export
#' @param class labels (0 or 1)
#' @param features covaraites to be used in propensity scoring
#' @param effect either "treated" (treatment effect over treated) or "overall"
#' @param method various propensity scoring methods
#' @param improved if true, use the IPW3 method described in the reference.
#'                 Only available if method = "overall".
#' @return a list of \describe{
#'   \item{model}{the model used.}
#'   \item{weights}{estimated weights}
#'   \item{propensity.score}{estimated propensity score}
#'   \item{method}{method used}
#' }
#' @references
#'   Lunceford & Davidian, Stratification and Weighting
#'   Via the Propensity Score in Estimation of
#'   Casual Treatment Effects: A Comparative Study
#'   2004
PS.weights <- function(class, features,
                       effect = c("treated", "overall"),
                       method =
                           c("glm", "glmnet", "gbm", "cbps", "ebal", "unif"),
                       improved = FALSE, ...) {

    effect <- match.arg(as.character(effect), c("treated", "overall"))
    method <- match.arg(as.character(method),
                        c("glm", "glmnet", "gbm", "cbps", "ebal", "unif"))

    n <- length(class)
    data <- data.frame(class = class, features = features)

    if (method %in% c("glm", "glmnet", "gbm", "cbps")) {

        # First estimate propensity scores
        if (method == "glm") {
            model <- glm(class ~ ., family = "binomial", data = data)
            propensity.score <- fitted(model)
        }
        else if (method == "glmnet") {
            require(glmnet)
            model <- cv.glmnet(x = features, y = class,
                               family = "binomial", ...)
            propensity.score <- plogis(predict(model, features, s = "lambda.min"))
        }
        else if (method == "gbm") {
            require(gbm)
            model <- gbm(class ~ ., data = data,
                         distribution = "bernoulli", ...)
            best.iter.oob <- gbm.perf(model, method = "OOB", plot.it = FALSE)
            prob <- predict(model, data, n.trees = 1:model$n.trees,
                            type = "response")
            propensity.score <- prob[, best.iter.oob]
        }
        else if (method == "cbps") {
            require(CBPS)
            model <- CBPS(class ~ ., data = data, ATT = TRUE)
            propensity.score <- model$fitted.values
        }

        # get weights by estimated propensity score
        weights <- rep(0, n)
        if (effect == "overall") {
            if (improved) {
                C1 = sum((class - propensity.score)/propensity.score) /
                    sum(((class - propensity.score)/propensity.score)^2)
                C0 = - sum((class - propensity.score)/(1 - propensity.score)) /
                    sum(((class - propensity.score)/(1 - propensity.score))^2)
            } else {
                C1 = 0
                C0 = 0
            }
            weights[class == 1] <- 1 / propensity.score[class == 1] *
                (1 - C1 / propensity.score[class == 1])
            weights[class == 0] <- 1 / (1 - propensity.score[class == 0]) *
                (1 - C0 / (1 - propensity.score[class == 0]))
        } else if (effect == "treated") {
            weights[class == 1] <- 1
            weights[class == 0] <- propensity.score[class == 0] /
                (1 - propensity.score[class == 0])
        } else {
            stop("No such effect!")
        }
    } else {
        # Non propensity score methods
        propensity.score = NULL
        if (method == "unif") {
            # uniform weight
            weights <- rep(1, n)
            model <- NULL
        } else if (method == "ebal") {
            # get weights by entropy balancing
            require(ebal)
            if (effect == "treated") {
                sink('/dev/null')
                # ebalance prints a message every time, this is quite annoying
                # this sink command removes that output
                model <- ebalance(class, features)
                sink()
                weights <- rep(0, n)
                weights[class == 0] <- model$w
                weights[class == 1] <- 1
            } else if (effect == "overall") {
                warning(paste("The two sample ebal is known to be inconsistent",
                              "for the overal treatment effect.",
                              "This option should only be used",
                              "in simulations demonstrating this."))
                sink('/dev/null')
                model.c <- ebalance(c(rep(1, n), class[class == 0]),
                                    rbind(features, features[class == 0, ]))
                model.t <- ebalance(c(rep(1, n), 1 - class[class == 1]),
                                    rbind(features, features[class == 1, ]))
                sink()
                weights <- rep(0, n)
                weights[class == 0] <- model.c$w
                weights[class == 1] <- model.t$w
                model <- list(model.c = model.c, model.t = model.t)
            } else {
                stop("No such effect!")
            }
        } else {
            stop("No such method!")
        }
    }

    # normalize weights
    weights[class == 1] <- weights[class == 1] / sum(weights[class == 1])
    weights[class == 0] <- - weights[class == 0] / sum(weights[class == 0])

    list(model = model,
         weights = weights,
         propensity.score = propensity.score,
         method = method)
}


#' A wrapper of \code{PS.weights}.
#'
#' @description This function can handle multiple methods,
#'              while \code{PS.weights} only takes one input method.
#' @export
#' @param data a data object
#' @param effect either "treated" (treatment effect over treated) or "overall"
#' @param method.list a list of weighting methods
#' @param stratified if the data is stratified
#'                   (\code{data} must have a field \code{stratum})
#' @param print.level print level of the function, either 0, 1 or 2.
#' @return a list of weights,
#'         each corresponds to one method in \code{method.list}.
get.all.weights <- function(data,
                            effect = c("treated", "overall"),
                            method.list = c("unif", "oracle", "glm",
                                "glmnet", "cbps", "ebal"),
                            stratified = FALSE,
                            print.level = 0) {

    # check arguments
    effect <- match.arg(as.character(effect), c("treated", "overall"))
    if (stratified) {
        if (!("stratum" %in% names(data))) {
            stop("The data list doesn't have a field named stratum.")
        }
    } else {
        data$stratum <- rep("0", nrow(data))
    }

    output <- list()
    weights <- list()

    for (stratum in levels(data$stratum)) {

        if (print.level >= 1) {
            print(paste("Stratum: ", stratum))
        }

        ind <- (data$stratum == stratum)
        T <- data$T[ind]
        features <- data$features[ind, ]

        for (method in method.list) {
            if (method != "oracle") {
                if (print.level >= 2) {
                    print(paste("Method: ", method))
                }
                output[[stratum]][[method]] <- PS.weights(T, features,
                                               method = method, effect = effect)
                weights[[method]][ind] <- output[[stratum]][[method]]$weights
            }
        }
    }

    if (!stratified) {
        output <- output[["0"]]
    }

    if ("oracle" %in% method.list) {
        # compute oracle weights based on the true propensity score
        method <- "oracle"
        T <- data$T
        features <- data$features
        output[[method]] <- list(logit.true = data$logit.true,
                                 propensity.score = plogis(data$logit.true),
                                 method = "oracle")
        if (effect == "treated"){
            weights[[method]] <- plogis((1 - T) * data$logit.true) /
                (1 - plogis((1 - T) * data$logit.true))
            weights[[method]][T == 0] <- - weights[[method]][T == 0] /
                sum(weights[[method]][T == 0])
            weights[[method]][T == 1] <- weights[[method]][T == 1] /
                sum(weights[[method]][T == 1])
        } else {
            weights[[method]] <- 1 / plogis((2 * T - 1) * data$logit.true)
            weights[[method]][T == 0] <- - weights[[method]][T == 0] /
                sum(weights[[method]][T == 0])
            weights[[method]][T == 1] <- weights[[method]][T == 1] /
                sum(weights[[method]][T == 1])

        }
    }

    return(list(output = output,
                weights = weights))
}

#' Get estimated causal effect by different weighting methods
#'
#' @export
#' @param data a data object, must have fileds named T and Y
#' @param weights a list or data frame.
#'                Each column contains weights on all the data points.
#' @param Y.name if data$Y has multiple columns,
#'               this specifies the column to be considered
#' @param individual.return if TRUE, returns
#'                          the individual estimate for the two groups
#' @param log if TRUE, use log(Y+1) instead of Y
#' @inheritParams get.all.weights
#' @return a vector of estimated causal effect
#' TODO(qyzhao): calculate the standard error of the estimates.
get.effect.by.weights <- function(data,
                                  weights,
                                  stratified = FALSE,
                                  Y.name = NULL,
                                  individual.return = FALSE,
                                  log = FALSE) {
    if (stratified) {
        if (!("stratum" %in% names(data))) {
            stop("The data list doesn't have a field named stratum.")
        }
    } else {
        data$stratum <- rep("0", nrow(data))
    }

    if (!is.null(Y.name)) {
        data$Y <- data$Y[[Y.name]]
    } else {
        Y.name <- "Y"
    }

    if (log) {
        data$Y <- log(data$Y + 1)
    }

    weights <- data.frame(weights)
    if (!individual.return) {
        df <- matrix(0, 0, ncol(weights))
        for (stratum in levels(data$stratum)) {
            ind <- (data$stratum == stratum)
            df <- rbind(df, colSums(weights[ind, ] * data$Y[ind]))
        }
    } else {
        df <- matrix(0, 0, ncol(weights) * 2)
        for (stratum in levels(data$stratum)) {
            ind <- (data$stratum == stratum)
            df <- rbind(
                df,
                # average over control
                c(colSums(- (1 - data$T[ind]) * weights[ind, ] * data$Y[ind]),
                # average over treatment
                  colSums(data$T[ind] * weights[ind, ] * data$Y[ind])))
        }
        colnames(df) <- apply(
            expand.grid(colnames(weights), c(paste(Y.name, c("0","1")))),
            1, paste, collapse = " ")
    }
    df <- data.frame(df)

    if (stratified) { # if stratified, add stratum column
        df <- cbind(levels(data$stratum), df)
        colnames(df)[1] <- "stratum"
    }

    return(df)
}
