#' @export
maxent.ridge <- function(X,
                         target,
                         lambda,
                         alpha,
                         coef.init,
                         maxiter = 100,
                         tol = 1e-6) {
    X <- as.matrix(X)
    if (ncol(X) != length(alpha)) {
        stop("The number of columns of X should be the same as the length of alpha")
    }
    alpha <- lambda * alpha
    hard.constraint <- which(alpha == 0)
    X.hard <- X[, hard.constraint]
    soft.constraint <- which(alpha != 0)
    X.soft <- X[, soft.constraint]
    n <- nrow(X)
    p <- ncol(X)
    step.size.min <- min(tol, 1 / lambda)

    ## feasibility of the problem
    ## library(lpSolve)
    ## solvable <- lp("min",
    ##                objective.in = rep(0, n),
    ##                const.mat = rbind(diag(n), t(X.hard), rep(1, n)),
    ##                const.dir = c(rep(">", n), rep("=", length(hard.constraint) + 1)),
    ##                const.rhs = c(rep(0, n), target[hard.constraint], 1))
    ## if (solvable$status == 2) {
    ##     stop("The maxent problem is not feasible. Try use some very small alpha instead of 0.")
    ## }

    X <- t(t(X) - target)

    ## objective
    coef.to.logit <- function(theta) {
        return(X %*% theta)
    }
    objective <- function(theta, a = 1) {
        library(matrixStats)
        logit <- coef.to.logit(theta)
        logSumExp(logit) + a * sum(alpha * theta^2) / 2
    }

    ## initialize
    if (is.null(coef.init)) {
        theta <- rep(0, p)
    } else {
        theta <- coef.init[-1]
    }

    library(matrixStats)
    ## Newton method
    for (iter in 1:maxiter) {
        logit <- coef.to.logit(theta)
        logit.tot <- logSumExp(logit)
        obj.value <- objective(theta)
        ## print(obj.value)

        gradient <- t(X) %*% exp(logit - logit.tot) + alpha * theta
        if (max(abs(gradient)) < tol) {
            break
        }
        hessian <- t(X * as.vector(exp(logit - logit.tot))) %*% X  - (t(X) %*% exp(logit - logit.tot)) %*% t(t(X) %*% exp(logit - logit.tot)) + diag(alpha)
        ## if (p > n/2) {
        ##     hessian <- diag(diag(hessian))
        ## }
        newton.step <- solve(diag(diag(hessian)), gradient)
        try(newton.step <- solve(hessian, gradient), silent = TRUE)
        step.size <- 1
        while (step.size > step.size.min &&
                   objective(theta - step.size * newton.step) > obj.value) {
            step.size <- step.size / 2
        }
        if (step.size <= step.size.min) {
            step.size <- 1
            while (step.size > step.size.min &&
                       objective(theta - step.size * gradient) > obj.value) {
                step.size <- step.size / 2
            }
            if(step.size <= step.size.min) {
                break
            } else {
                theta <- theta - step.size * gradient
            }
        } else {
            theta <- theta - step.size * newton.step
        }
    }

    logit <- coef.to.logit(theta)
    logit.tot <- logSumExp(logit)

    ## theta0 <- log(sum(expit)) # intercept
    theta0 <- 0
    w <- exp(logit - logit.tot)

    return(list(w = w,
                coefs = c(theta0, theta),
                converged = max(abs(gradient)) < 1e-3,
                loss = objective(theta, 0)))

}

#' @export
get.basis <- function(X, beta = 2, nbasis = 31, interaction.degree = 2) {

    X <- apply(X, 2, range01) # standardize the X

    library(fda)
    phi <- create.fourier.basis(c(0, 1), nbasis)
    uni.basis <- lapply(1:ncol(X),
                        function(j) eval.basis(X[, j], phi)[, -1])

    model.formula <- paste(paste0("uni.basis[[", 1:ncol(X), "]]"), collapse = " + ")
    model.formula <- paste0("~ (", model.formula, ")^", interaction.degree, " -1")
    features <- model.matrix(as.formula(model.formula))

    degree <- lapply(colnames(features),
                     function(s)
                         sapply(unlist(strsplit(s, ":")),
                                function(ss)
                                    as.numeric(substr(ss, regexpr("cos|sin", ss) + 3, nchar(ss)))) )
    degree <- lapply(degree, function(x) {x[is.na(x)] <- 0; x} )
    alpha <- sqrt(sapply(degree, function(x) sum(x^(2 * beta))))

    return(list(features = features,
                degree = degree,
                alpha = alpha))
}
