#' Copula regression models with semiparametric additive hazards margins for bivariate interval censoring and semi-competing risks.
#'
#' @description Fits a copula model with semiparametric additive hazards marginsfor bivariate interval censoring and semi-competing risks.
#'
#' @name copula_additive
#' @aliases copula_additive
#' @param data a data frame; must have \code{id} (subject id), \code{visit} (the visit number for the subject),
#' \code{visit_time} (the time for each visit in years), \code{status} (the disability status at visit_time, 1 for disability, 0 for non-disability),
#' \code{statusD} (the death status at visit_time, 1 for dead, 0 for alive), and \code{weight} sampling weight and each subject received the same weight across visits.
#' @param var_list the list of covariates to be fitted into the copula model.
#' @param l1 Left boundary of event 1 time interval.
#' @param u1 Right boundary of event 1 time interval.
#' @param m1 Degree of Bernstein polynomial for event 1.
#' @param l2 Left boundary of event 2 time interval.
#' @param u2 Right boundary of event 2 time interval.
#' @param m2 Degree of Bernstein polynomial for event 2.
#' @param method optimization method (see \code{?optim}); default is \code{"BFGS"};
#' also can be \code{"Newton"} (see \code{?nlm}).
#' @param iter number of iterations when \code{method = "Newton"};
#' default is 300.
#' @param stepsize size of optimization step when method is \code{"Newton"};
#' default is 1e-6.
#' @param control a list of control parameters for methods other than \code{"Newton"};
#' see ?optim.
#' @param hes default is \code{TRUE} for hessian calculation;
#' if LRT is desired, can set \code{hes = FALSE} to save time.
#' @param eta_ini Optional initial values for copula parameters.
#' @importFrom corpcor pseudoinverse
#' @importFrom stats pchisq
#' @importFrom stats optim
#' @importFrom stats constrOptim
#' @importFrom stats rexp
#' @importFrom stats runif
#' @importFrom copula normalCopula
#' @importFrom stats nlm

#' @export


#' @source
#' Tao Sun, Huiping Zheng, and Xiaojun Wang (2025+).
#' Decomposing Differences in Cohort Health Expectancy by Cause and Age with Longitudinal Data.
#' Under review. \cr
#'
#'
#' @details must have \code{id} (subject id), \code{visit} (the visit number for the subject),
#' \code{visit_time} (the time for each visit in years), \code{status} (the disability status at visit_time, 1 for disability, 0 for non-disability),
#' \code{statusD} (the death status at visit_time, 1 for dead, 0 for alive),
#' and \code{weight} sampling weight and each subject received the same weight across visits. \cr
#'
#'
#' Optimization methods can be all methods (except \code{"Brent"}) from \code{optim}, such as
#' \code{"Nelder-Mead"}, \code{"BFGS"}, \code{"CG"}, \code{"L-BFGS-B"}, \code{"SANN"}.
#' Users can also use \code{"Newton"} (from \code{nlm}).
#'
#' @return a \code{LongDecompHE} object summarizing the model.
#' Can be used as an input to general \code{S3} methods including
#' \code{summary}, \code{print}, \code{plot},
#' \code{coef}, \code{logLik}, \code{AIC}.
#' @examples
#' # Fit a Copula2-Semiparametric additive hazards model
#'data(simulated_dataA)
#'u1 = u2 = max(simulated_dataA$visit_time)
#'var_list = c("Z1", "Z2", "Z3")
#'copula_additive_model <-  copula_additive(data = simulated_dataA,
#'                                         var_list = var_list,
#'                                         l1=0, u1 = u1, m1 = 3,
#'                                         l2=0, u2 = u2, m2 = 3,
#'                                         method = "combined", iter=1000,
#'                                         stepsize=1e-6,
#'                                         hes = TRUE,
#'                                         control = list(maxit = 10000))
#'summary(copula_additive_model)
#'
#'
#'

copula_additive <- function(data, var_list,
                             l1=0, u1, m1 = 3,
                             l2=0, u2, m2 = 3,
                             method = "Newton", iter=1000, stepsize=1e-5,
                             hes = TRUE, control = list(), eta_ini = NULL){

  copula = "Copula2"
  data <- data_sim_copula_scmprisk_addhaz_tvc_longclean(data, var_list)
  weight <- data$weight

  data_processed <- data_process_scmprisk_ic_sp_tvc(data, var_list, l1, u1, m1, l2, u2, m2)
  indata1 <- data_processed$indata1
  indata2 <- data_processed$indata2
  t1_left <- data_processed$t1_left
  t1_right <- data_processed$t1_right
  t2 <- data_processed$t2
  n <- data_processed$n
  p <- data_processed$p
  x1_left <- data_processed$x1_left
  x1_right <- data_processed$x1_right
  x2_timeD <- data_processed$x2_timeD
  var_list <- data_processed$var_list
  var_list_new <- data_processed$var_list_new
  var_list_exact <- data_processed$var_list_exact
  var_list_change <- data_processed$var_list_change

  p1 <- dim(x1_left)[2]
  p2 <- dim(x2_timeD)[2]/2

  # BP
  bl1 <- data_processed$bl1
  br1 <- data_processed$br1
  b2 <- data_processed$b2

  # BP derivatives
  bl1_d <- data_processed$bl1_d
  br1_d <- data_processed$br1_d
  b2_d <- data_processed$b2_d


  # decide weight #
  if (is.null(weight)) {
    weight = rep(1, n)
  }

  ###################################
  ############ Step 1a ##############
  ###################################

  ###### obtain consistent estimator for timeD under RC and LT (in fact, LT is not considered due to computational issues)#######
  x2_1a <- data.frame(timeD = c(indata2$timeD),
                      statusD = c(indata2$statusD),
                      x2_timeD)

  if (method == "Newton") {

    model_step1a <- nlm(estimate_sieve_step1a_scmprisk_rc_addhaz_tvc, rep(0.01, (p2+m2+1)), hessian = F,
                        iterlim = iter, steptol = stepsize,
                        p2=p2, m2 = m2, x2_timeD = x2_timeD, b2 = b2,  b2_d = b2_d,
                        indata2 = x2_1a, weight = weight)

    beta_ini_2 <- model_step1a$estimate[1:p2]
    phi_ini_2 <- model_step1a$estimate[(p2+1):(p2+1+m2)]

  }

  if (method == "constrOptim" | method == "combined") {

    # constrained optimization: Lambda(t) is already positive; xbeta is constrained postive (all factors are risk factors)
    model_step1a <- constrOptim(theta = rep(0.2, (p2+m2+1)), f = estimate_sieve_step1a_scmprisk_rc_addhaz_tvc,
                                ui = cbind(x2_timeD[,(p2+1):(p2+p2)],matrix(0, nrow = nrow(x2_timeD), ncol = m2+1)),
                                ci = rep(-1e-10, nrow(x2_timeD)), method = "Nelder-Mead", hessian = F,
                                p2 = p2, m2 = m2, x2_timeD = x2_timeD,
                                b2 = b2, b2_d = b2_d, indata2 = x2_1a,
                                weight = weight,
                                control = control)

    sum(cbind(x2_timeD[,(p2+1):(p2+p2)],b2_d) %*% matrix(c(model_step1a$par[1],cumsum(exp(model_step1a$par[-1]))),ncol = 1)<=0)
    sum(cbind(x2_timeD[,(p2+1):(p2+p2)],b2) %*% matrix(c(model_step1a$par[1],cumsum(exp(model_step1a$par[-1]))),ncol = 1)<=0)

    beta_ini_2 <- model_step1a$par[1:p2]
    phi_ini_2 <- model_step1a$par[(p2+1):(p2+1+m2)]

  }



  ###### obtain initial (inconsistent) estimator for event1 under IC and LT #######
  x1_1a <- data.frame(Left = c(indata1$Left),
                      Right = c(indata1$Right),
                      status = indata1$status,
                      (x1_left),x1_right)

  if (method == "Newton") {
    model_step1a <- nlm(estimate_sieve_step1a_scmprisk_ic_addhaz_tvc, rep(0.1, (p1+m1+1)), hessian = FALSE,
                        iterlim = iter, steptol = stepsize,
                        p1 = p1, m1 = m1, x1_left = x1_left, x1_right = x1_right,
                        bl1 = bl1, br1 = br1,
                        indata1 = x1_1a, weight = weight)

    beta_ini_1 <- model_step1a$estimate[1:p1]
    phi_ini_1 <- model_step1a$estimate[(p1+1):(p1+1+m1)]
  }


  if (method == "constrOptim" | method == "combined") {

    # constrained optimization: Lambda(t) is already positive; xbeta is constrained postive (all factors are risk factors)
    model_step1a <- constrOptim(theta = rep(0.2, (p1+m1+1)), f = estimate_sieve_step1a_scmprisk_ic_addhaz_tvc,
                                ui = cbind(x1_left, matrix(0, nrow = nrow(x1_left), ncol = m1+1)),
                                ci = rep(-1e-10, nrow(x1_left)), method = "Nelder-Mead", hessian = F,
                                p1 = p1, m1 = m1, x1_left = x1_left, x1_right = x1_right, indata1 = x1_1a,
                                bl1 = bl1, br1 = br1,
                                weight = weight,
                                control = control)

    sum(cbind(x1_left,bl1) %*% matrix(c(model_step1a$par[1],cumsum(exp(model_step1a$par[-1]))),ncol = 1)<=0)
    sum(cbind(x1_right,br1) %*% matrix(c(model_step1a$par[1],cumsum(exp(model_step1a$par[-1]))),ncol = 1)<=0)

    beta_ini_1 <- model_step1a$par[1:p1]
    phi_ini_1 <- model_step1a$par[(p1+1):(p1+1+m1)]

  }

  ###################################
  ############ Step 1b ##############
  ###################################

  if (is.null(eta_ini)) {
    if (copula == "AMH") {
      eta_ini <- 1
    }

    else if (copula == "Copula2") {
      eta_ini <- c(log(0.5/0.5), log(1))
    }

    else {
      eta_ini <- 1
    }
  }


  if (method == "Newton") {

    fit0 <- nlm(ic_scmprisk_copula_log_lik_sieve_pseudo_addhaz_copula2_tvc, p = c(eta_ini, phi_ini_1, beta_ini_1),
                fitted = c(phi_ini_2,beta_ini_2),
                x1_left = x1_left, x1_right = x1_right, x2_timeD = x2_timeD, t1_left = t1_left, t1_right = t1_right, t2=t2, indata1 = indata1,indata2 = indata2,
                bl1 = bl1, br1 = br1, b2 = b2, b2_d = b2_d, m1 = m1, m2 = m2, p1 = p1, p2 = p2,
                iterlim = iter, steptol = stepsize, copula = copula, weight = weight)

    if (copula == "Copula2") {
      p_ini <- c((fit0$estimate[1]), (fit0$estimate[2]), fit0$estimate[3:length(fit0$estimate)]) # anti-log
    } else {
      p_ini <- c((fit0$estimate[1]), fit0$estimate[2:length(fit0$estimate)]) # anti-log
    }


  }

  if (method == "constrOptim"| method == "combined") {

    fit0 <- constrOptim(theta = c(eta_ini, phi_ini_1, beta_ini_1), f = ic_scmprisk_copula_log_lik_sieve_pseudo_addhaz_copula2_tvc,
                        fitted = c(phi_ini_2,beta_ini_2),
                        ui = cbind(matrix(0, nrow = nrow(x1_left), ncol = length(eta_ini)+m1+1),x1_left),
                        ci = rep(-1e-10, nrow(x1_left)), method = "Nelder-Mead", hessian = F,
                        x1_left = x1_left, x1_right = x1_right, x2_timeD = x2_timeD,
                        t1_left = t1_left, t1_right = t1_right, t2=t2, indata1 = indata1,indata2 = indata2,
                        bl1 = bl1, br1 = br1, b2 = b2, b2_d = b2_d, m1 = m1, m2 = m2, p1 = p1, p2 = p2,
                        copula = copula, weight = weight,
                        control = control)

    if (copula == "Copula2") {
      p_ini <- c((fit0$par[1]), (fit0$par[2]), fit0$par[3:length(fit0$par)]) # anti-log
    } else {
      p_ini <- c((fit0$par[1]), fit0$par[2:length(fit0$par)]) # anti-log
    }

  }


  ###################################
  ############ Step 2 ###############
  ###################################
  if (method == "Newton" |  method == "combined") {

    # alpha = expit(alpha0), kappa = exp(alpha0)
    model_step2 <- nlm(ic_scmprisk_copula_log_lik_sieve_addhaz_copula2_tvc, p = c(p_ini, phi_ini_2, beta_ini_2), # eta, margin1,margin2
                       x1_left = x1_left, x1_right = x1_right, x2_timeD = x2_timeD,
                       t1_left = t1_left, t1_right = t1_right, t2=t2, indata1 = indata1,indata2 = indata2,
                       bl1 = bl1, br1 = br1, b2 = b2, b2_d = b2_d, m1 = m1, m2 = m2, p1 = p1, p2 = p2,
                       iterlim = iter, steptol = stepsize, hessian = T, weight = weight,
                       copula = copula)


    inv_info <- pseudoinverse(model_step2$hessian)
    dih <- diag(inv_info)
    dih[dih < 0] <- 0
    se <- sqrt(dih)
    beta <- if (copula != "Copula2") c((model_step2$estimate[1]), model_step2$estimate[c((1+1+m1+1):(2+m1+p1), (2+m1+p1+1+m2+1):(2+m1+p1+1+m2+p2))]) else c(exp((model_step2$estimate[1]))/(1+exp(model_step2$estimate[1])), exp(model_step2$estimate[2]), model_step2$estimate[c((2+1+m1+1):(2+1+m1+p1), (2+1+m1+p1+1+m2+1):(2+1+m1+p1+1+m2+p2))])
    se <- if (copula != "Copula2") c(se[1], se[c((1+1+m1+1):(2+m1+p1), (2+m1+p1+1+m2+1):(2+m1+p1+1+m2+p2))]) else c(se[1]*exp(model_step2$estimate[1])/(1+exp(model_step2$estimate[1]))^2, se[2]*exp(model_step2$estimate[2]), se[c((2+1+m1+1):(2+1+m1+p1), (2+1+m1+p1+1+m2+1):(2+1+m1+p1+1+m2+p2))])
    llk <- -1 * model_step2$minimum
    AIC <- 2 * length(model_step2$estimate) - 2 * llk
    stat <- (beta - 0)^2/se^2
    pvalue <- pchisq(stat,1,lower.tail=F)
    summary <- cbind(beta, se, stat, pvalue)

    tmp_name2 <- if (copula != "Copula2") c("eta") else c("alpha","kappa")
    rownames(summary) = c(tmp_name2, paste0(c(var_list),"_1"), paste0(c(var_list),"_2"))
    colnames(summary) <- c("estimate","SE","stat","pvalue")
    code <- model_step2$code
    output <- list(code = code, data=data, u= u1 ,summary = summary, llk = llk, AIC = AIC,
                   copula = copula, indata1 = indata1, weight=weight,
                   indata2 = indata2, var_list = var_list,
                   estimates = model_step2$estimate, x1_left = x1_left, x1_right = x1_right, x2_timeD = x2_timeD,
                   inv_info = inv_info,
                   bl1 = bl1, br1 = br1, b2 = b2, b2_d = b2_d,m=min(m1,m2),
                   m1 = m1, m2 = m2, p1 = p1, p2 = p2, data = data,
                   beta_ini_1=beta_ini_1, phi_ini_1=phi_ini_1,
                   beta_ini_2=beta_ini_2, phi_ini_2=phi_ini_2)

  }

  class(output) <- "LongDecompHE"
  return(output)
}
