#' @title Magics
#'
#' @description A main function for conducting causal mediation analysis
#'  with co-mediators derived from zero-inflated single-cell data.
#' @param data.name A data.frame or matrix with N x (2G + k), where N
#'    is the number of samples, G is the number of genes (each gene contributes
#'    two features: one for the zero component and one for the non-zero
#'    component), and K is the number of covariates.
#' @param n_genes An interger value. The number of genes (G) represented
#'   in the data.
#' @param covariate.names A character vector to specify the column name of covariates.
#' @return A list containing the following elements: (1) estimated coefficients from
#' the outcome and two mediation models (M and F models in methodology paper); (2) standard errors corresponding to (1);
#' (3) logical vector indicating whether each gene's mediator
#' component (M model) is statistically significant; (4) logical vector indicating whether each gene's zero-inflation
#' component (F model) is statistically significant; (5) Adjusted p-values for M and F model (joint significance test).
#' @references Ahn S, Li Z. A Statistical Framework for Co-Mediators of Zero-Inflated Single-Cell RNA-Seq Data.
#'      ArXiv. 2025 July 8:arXiv:2507.06113v1.
#'      Available at: \url{https://arxiv.org/pdf/2507.06113}
#'
#' @examples
#' \donttest{
#' data("simulated_data")
#' n_genes = ncol(simulated_data[, grep("^(M_)", colnames(simulated_data))])
#' Magics(data.name = simulated_data, n_genes = n_genes, covariate.names = c("Z1", "Z2", "Z3"))
#' }
#' @import MASS
#' @import betareg
#' @import glmnet
#' @importFrom stats as.formula coef lm p.adjust pnorm
#' @importFrom utils capture.output
#' @export

Magics <- function(data.name, n_genes, covariate.names) {

  MF_mat <- as.matrix(data.name[, grep("^(M_|F_)", colnames(data.name))])
  Y_vec <- data.name$Y

  ### STEP 1: Identify overlapping genes as a screening step
  ### 1.1: Fit the lasso regression outcome model using simulated expression data (M)
  outcome.lasso <- cv.glmnet(as.matrix(MF_mat), Y_vec, alpha = 1, type.measure = "mse")
  coef_lasso <- coef(outcome.lasso, s = "lambda.min")
  selected_genes <- as.matrix(coef_lasso)
  selected_genes <- selected_genes[selected_genes[, 1] != 0, , drop = FALSE]
  selected_gene_names <- rownames(selected_genes)
  selected_gene_names <- setdiff(selected_gene_names, "(Intercept)")

  ### 1.2: Fit the mediator models for M_g and F_g screening genes affected by exposure X
  ## 1.2.1: NB mediation model for M_g (X -> M_g)
  nb_models_M  <- list()
  nb_coeff_M <- numeric(n_genes)
  pvals_M <- numeric(n_genes)
  selected_M_gene_names <- c()

  for (g in 1:n_genes) {
    gene_col <- colnames(data.name[, grep("^(M_)", colnames(data.name))])[g]
    formula_str <- as.formula(paste(gene_col, "~ X"))
    fit <- suppressWarnings({
      tryCatch(glm.nb(formula_str, data = data.name), error = function(e) NULL)})
    if (!is.null(fit)) {
      nb_models_M[[g]] <- fit
      nb_coeff_M[g] <- coef(fit)["X"]
      pvals_M[g] <- summary(fit)$coefficients["X", "Pr(>|z|)"]
      if (pvals_M[g] < 0.05) {
        selected_M_gene_names <- c(selected_M_gene_names, gene_col)
      }
    } else {
      nb_models_M[[g]] <- NULL
      nb_coeff_M[g] <- NA
      pvals_M[g] <- NA
    }
  }

  ## 1.2.2: Beta mediation model for F_g (X -> F_g)
  beta_models_F <- list()
  beta_coeff_F <- numeric(n_genes)
  pvals_F <- numeric(n_genes)
  selected_F_gene_names <- c()
  for (g in 1:n_genes) {
    gene_col <- colnames(data.name[, grep("^(F_)", colnames(data.name))])[g]
    formula_str <- as.formula(paste(gene_col, "~ X"))
    invisible(
      suppressWarnings(
        suppressMessages(
          capture.output({
            fit <- tryCatch({
              betareg(formula_str, data = data.name)
            }, error = function(e) {
              NULL
            })
          }, type = "message"))))
    if (!is.null(fit)) {
      beta_models_F[[g]] <- fit
      beta_coeff_F[g] <- coef(fit)["X"]
      pvals_F[g] <- summary(fit)$coefficients$mean["X", "Pr(>|z|)"]
      if (pvals_F[g] < 0.05) {
        selected_F_gene_names <- c(selected_F_gene_names, gene_col)
      }
    } else {
      beta_models_F[[g]] <- NULL
      beta_coeff_F[g] <- NA
      pvals_F[g] <- NA
    }
  }

  ## Identify overlapping genes
  Y_indices <- unique(sub("^[^_]*_", "", selected_gene_names))
  M_indices <- sub("^[^_]*_", "", selected_M_gene_names)
  F_indices <- sub("^[^_]*_", "", selected_F_gene_names)

  common_indices <- Reduce(union, list(Y_indices, M_indices, F_indices))
  M_included_genes <- common_indices[common_indices %in% M_indices]
  F_included_genes <- common_indices[common_indices %in% F_indices]

  ### STEP 2: Fit the final model (including genes from all models with X and Z)
  Z_names = covariate.names
  M_names <- if (length(M_included_genes) > 0) paste0("M_", M_included_genes) else character(0)
  F_names <- if (length(F_included_genes) > 0) paste0("F_", F_included_genes) else character(0)
  predictors <- c("X", Z_names, M_names, F_names)

  if (length(M_names) > 0 | length(F_names) > 0) {
    formula_str <- paste("Y ~", paste(predictors, collapse = " + "))
    final_formula <- as.formula(formula_str)
    fit_final_model <- lm(final_formula, data = data.name)
    coef_outcome <- coef(fit_final_model)
    se_outcome <- coef(summary(fit_final_model))[, "Std. Error"]
    pvals_final <- 2 * (1 - pnorm(abs(coef_outcome / se_outcome)))

    ### Joint Significance Testing
    joint_pvals_M = joint_pvals_F = gamma_coef_vec = alpha_coef_vec = NULL
    gamma_se_vec = alpha_se_vec = NULL
    for (m_var in M_names) {
      formula_Mg <- as.formula(paste0(m_var, " ~ X + ", paste(Z_names, collapse = " + ")))
      fit_Mg <- glm.nb(formula_Mg, data = data.name)
      gamma_coef <- coef(fit_Mg)["X"]
      gamma_coef_vec <- c(gamma_coef_vec, gamma_coef)
      gamma_se <- coef(summary(fit_Mg))["X", "Std. Error"]
      gamma_se_vec <- c(gamma_se_vec, gamma_se)
      p_gamma_Xg <- 2 * (1 - pnorm(abs(gamma_coef / gamma_se)))
      p_beta_Mg <- pvals_final[m_var]
      joint_pvals_M <- c(joint_pvals_M, max(p_gamma_Xg, p_beta_Mg, na.rm = TRUE))
    }

    for (f_var in F_names) {
      formula_Fg <- as.formula(paste0(f_var, " ~ X + ", paste(Z_names, collapse = " + ")))
      fit_Fg <- betareg(formula_Fg, data = data.name)
      alpha_coef <- summary(fit_Fg)$coefficients$mean["X", "Estimate"]
      alpha_coef_vec <- c(alpha_coef_vec, alpha_coef)
      alpha_se <- summary(fit_Fg)$coefficients$mean["X", "Std. Error"]
      alpha_se_vec <- c(alpha_se_vec, alpha_se)
      p_alpha_Xg <- 2 * (1 - pnorm(abs(alpha_coef / alpha_se)))
      p_beta_Fg <- pvals_final[f_var]
      joint_pvals_F <- c(joint_pvals_F, max(p_alpha_Xg, p_beta_Fg, na.rm = TRUE))
    }

    if(!is.null(joint_pvals_M)){ names(joint_pvals_M) = M_names }
    if(!is.null(joint_pvals_F)){ names(joint_pvals_F) = F_names }

    fdr_M <- p.adjust(joint_pvals_M, method = "BH")
    fdr_F <- p.adjust(joint_pvals_F, method = "BH")
    signif_M <- which(fdr_M < 0.05)
    signif_F <- which(fdr_F < 0.05)


    if(is.null(joint_pvals_M) & !is.null(joint_pvals_F)){
      return(list(
        coef_outcome = coef_outcome,
        se_outcome = se_outcome,
        alpha_coef_vec = alpha_coef_vec,
        alpha_se_vec= alpha_se_vec,
        signif_F = signif_F,
        adj_pvals_F = fdr_F
      ))
    }
    if(!is.null(joint_pvals_M) & is.null(joint_pvals_F)){
      return(list(
        coef_outcome = coef_outcome,
        se_outcome = se_outcome,
        gamma_coef_vec = gamma_coef_vec,
        gamma_se_vec= gamma_se_vec,
        signif_M = signif_M,
        adj_pvals_M = fdr_M
      ))
    }
    if(!is.null(joint_pvals_M) & !is.null(joint_pvals_F)){
      return(list(
        coef_outcome = coef_outcome,
        se_outcome = se_outcome,
        gamma_coef_vec = gamma_coef_vec,
        gamma_se_vec= gamma_se_vec,
        alpha_coef_vec = alpha_coef_vec,
        alpha_se_vec= alpha_se_vec,
        signif_M = signif_M,
        signif_F = signif_F,
        adj_pvals_M = fdr_M,
        adj_pvals_F = fdr_F
      ))
    }
  } else {
    return("No mediator detected.")
    return(invisible(NULL))
  }
}
