# Function arguments

# data: Data used to fit the MELS model. It needs to be consistent with what the modeling algorithm actually use. For instance, if all observations with missing value(s) are dropped when fitting the models, they should also be taken out here. 
# slope: 1 if the model includes random slopes, 0 if the model includes only random intercepts (of which variance can be influenced by covariates)
# subject: Column name/number of the variable that indicates subjects' ID's  
# fixed_covs: Vector/scalar of column name(s)/number(s) of fixed location effect covariates
# rand_covs: Vector/scalar of column name(s)/number(s) of random slope covariates; include only if rand_slope = 1
# bs_covs: Vector/scalar of column name(s)/number(s) of covariates for random intercept variance; include only if rand_slope = 0  
# ws_covs: Vector/scalar of column name(s)/numbers(s) of covariates for WS variances
# beta0: Estimate of fixed intercept  
# beta: Vector/scalar of estimate(s) of fixed slope(s) 
# alpha0: Estimate of fixed intercept in the model predicting variance of the random intercept; include only if rand_slope = 0  
# alpha: Vector/scalar of estimate(s) of fixed slope(s) in the model predicting variance of the random intercept; include only if rand_slope = 0  
# sigma_v: Estimate of the variance-covariance matrix of random intercept and random slopes; include only if rand_slope = 1  
# tau0: Estimate of the fixed intercept in the scale model 
# tau: Vector/scalar of estimate(s) of the fixed slope(s) in the scale model 
# sigma_omega: Estimate of the standard deviation of the random scale effect

# Function code

library(MASS)
library(tidyverse)
library(nloptr)
library(rockchalk)
library(gtable)
library(grid)
library(gridExtra)

r2MELS <- function(data = NULL, rand_slope=0, subject = NULL, 
                   fixed_covs = NULL, rand_covs = NULL, bs_covs = NULL, 
                   ws_covs = NULL, beta0 = NULL, 
                   beta = NULL, alpha0 = NULL, alpha = NULL, sigma_v = NULL, 
                   tau0 = NULL, tau = NULL, sigma_omega = NULL){
  # Part I: Variance Partitioning
  # Calculation of f1, f2, and e (e0, e1, e2, d) is the same 
  # for the two kinds of models
  if(!is.null(fixed_covs)){
    centered_fixed_covs <-  gmc(data,fixed_covs,subject)
    
    # Make a new dataset with only the first observation of each subject
    # for the calculation of phi_x_b
    # To accommodate unbalanced design 
    # (different numbers of observations for each subject)
    
    fixed_1obs <- centered_fixed_covs %>% 
      group_by_at(subject) %>% 
      filter(row_number()==1)
    
    phi_x_w <- var(centered_fixed_covs[,paste0(fixed_covs, "_dev")])
    phi_x_b <- var(fixed_1obs[,paste0(fixed_covs, "_mn")])
    f1 <- t(beta) %*% phi_x_w %*% beta
    f2 <- t(beta) %*% phi_x_b %*% beta
  } else{
    f1 <- 0
    f2 <- 0
  }
  
  if(!is.null(ws_covs)){
    centered_ws_covs <-  gmc(data,ws_covs,subject)
    # Make a new dataset with only the first observation of each subject
    # for the calculation of phi_w_b
    # To accommodate unbalanced design 
    # (different numbers of observations for each subject)
    ws_1obs <- centered_ws_covs %>% 
      group_by_at(subject) %>% 
      filter(row_number()==1)
    
    phi_w_w <- var(centered_ws_covs[,paste0(ws_covs, "_dev")])
    phi_w_b <- var(ws_1obs[,paste0(ws_covs, "_mn")])
    mu_w <- colMeans(ws_1obs[,paste0(ws_covs, "_mn")])
    
    e <- exp(tau0 +  t(mu_w) %*% tau + (t(tau) %*% phi_w_w %*% tau + 
                                          t(tau) %*% phi_w_b %*% tau +
                                          sigma_omega^2)/2)
    e0 <- exp(tau0 +  t(mu_w) %*% tau)
    e1 <- t(tau) %*% phi_w_w %*% tau/2
    e2 <- t(tau) %*% phi_w_b %*% tau/2
    d <- (sigma_omega^2)/2 
  } else{
    if (!is.null(sigma_omega)){
      e <- exp(tau0 + sigma_omega^2/2)
      e0 <- exp(tau0)
      e1 <- 0
      e2 <- 0
      d <- (sigma_omega^2)/2
    } else{
      e <- exp(tau0)
      e0 <- exp(tau0)
      e1 <- 0
      e2 <- 0
      d <- 0
    }
  }
  
  # Calculation of variance of the response variable explained by 
  # the random location effects 
  # Different for the two kinds of models
  if(rand_slope == 1){
    # Random location effects are included through random intercepts and 
    # random slopes of covariates in the location model
    if(!is.null(rand_covs)){
      centered_rand_covs <-  gmc(data,rand_covs,subject)
      
      # Make a new dataset with only the first observation of each subject
      # for the calculation of phi_z_b & mu_z 
      # to accommodate unbalanced design 
      # (different numbers of observations for each subject)
      rand_1obs <- centered_rand_covs %>% 
        group_by_at(subject) %>% 
        filter(row_number()==1)
      
      phi_z_w <- var(centered_rand_covs[,paste0(rand_covs, "_dev")])
      phi_z_b <- var(rand_1obs[,paste0(rand_covs, "_mn")])
      v1 <- sum(diag(phi_z_w %*% sigma_v[-1, -1]))
      v2 <- sum(diag(phi_z_b %*% sigma_v[-1, -1]))
      
      mu_z <- c(1, colMeans(rand_1obs[,paste0(rand_covs, "_mn")]))
      m <- t(mu_z) %*% sigma_v %*% mu_z
    } else{
      v1 <- 0
      v2 <- 0
      m <- sigma_v
    }
  } else if(rand_slope == 0){
    # Random location effects are included through random intercepts
    # with covariate-influenced variances
    if(!is.null(bs_covs)){
      centered_bs_covs <-  gmc(data,bs_covs,subject)
      
      # Make a new dataset with only the first observation of each subject
      # for the calculation of phi_u_b & mu_u 
      # to accommodate unbalanced design 
      # (different numbers of observations for each subject)
      bs_1obs <- centered_bs_covs %>% 
        group_by_at(subject) %>% 
        filter(row_number()==1)
      
      phi_u_w <- var(centered_bs_covs[,paste0(bs_covs, "_dev")])
      phi_u_b <- var(bs_1obs[,paste0(bs_covs, "_mn")])
      
      mu_u <- colMeans(bs_1obs[,paste0(bs_covs, "_mn")])
      
      v <- exp(alpha0 + t(mu_u) %*% alpha + 
                 (t(alpha) %*% phi_u_w %*% alpha + 
                    t(alpha) %*% phi_u_b %*% alpha)/2)
      m <- exp(alpha0 + t(mu_u) %*% alpha)
      v1 <- t(alpha) %*% phi_u_w %*% alpha/2
      v2 <- t(alpha) %*% phi_u_b %*% alpha/2 
    } else{
      v <- exp(alpha0)
      m <- exp(alpha0)
      v1 <- 0
      v2 <- 0
    }
  }
  
  # Part II: Calculate the R_squared measures & Plot & Results Table
  # Scale part
  e0_s <- e0/e
  e1_s <- (e - e0) * e1/(e1 + e2 + d)/e
  e2_s <- (e - e0) * e2/(e1 + e2 + d)/e
  d_s <- (e - e0) * d/(e1 + e2 + d)/e
  
  decomp_table_s <- matrix(c(e1_s, e2_s, d_s, e0_s), nrow = 4)
  colnames(decomp_table_s) <- "scale"
  rownames(decomp_table_s) <- c("e1", "e2", "d", "e0")
  
  R2_table_s <- matrix(c(e1_s, e2_s, e1_s+e2_s, d_s), nrow = 4)
  colnames(R2_table_s) <- "scale"
  rownames(R2_table_s) <- c("e1", "e2", "e1e2", "d")
  
  # Location part
  if(rand_slope == 1){
    # Proportion of total variance of the response variable explained
    f1_t <- f1/(f1 + f2 + v1 + v2 + m + e)
    f2_t <- f2/(f1 + f2 + v1 + v2 + m + e)
    f_t <- f1_t + f2_t
    v1_t <- v1/(f1 + f2 + v1 + v2 + m + e)
    v2_t <- v2/(f1 + f2 + v1 + v2 + m + e)
    m_t <- m/(f1 + f2 + v1 + v2 + m + e)
    vm_t <- v1_t + v2_t + m_t
    fvm_t <- f_t + vm_t
    f2v2m_t <- f2_t + v2_t + m_t
    e_t <- e/(f1 + f2 + v1 + v2 + m + e)
    
    # Proportion of BS variance of the response variable explained
    f2_bs <- f2/(f2 + v2 + m)
    v2_bs <- v2/(f2 + v2 + m)
    m_bs <- m/(f2 + v2 + m)
    v2m_bs <- v2_bs + m_bs
    
    # Proportion of WS variance of the response variable explained
    f1_ws <- f1/(f1 + v1 + e)
    v1_ws <- v1/(f1 + v1 + e)
    f1v1_ws <- f1_ws + v1_ws
    e_ws <- e/(f1 + v1 + e)
    
    # Table
    # Variance partitioning
    decomp_table <- matrix(c(f1_t, f2_t, v1_t, v2_t, m_t, e_t, 
                             NA, f2_bs, NA, v2_bs, m_bs, NA,
                             f1_ws, NA, v1_ws, NA, NA, e_ws), nrow = 6)
    colnames(decomp_table) <- c(" total variance |", 
                                " BS variance |", 
                                " WS variance")
    rownames(decomp_table) <- c("f1", "f2", "v1", "v2", "m", "e")
    
    # R2 measure
    R2_table <- matrix(c(f1_t, f2_t, f_t, v1_t, v2_t, m_t, vm_t, fvm_t, 
                         f2v2m_t, 
                         NA, f2_bs, NA, NA, v2_bs, m_bs, NA, NA, NA,
                         f1_ws, NA, NA, v1_ws, NA, NA, NA, NA, NA), 
                       nrow = 9)
    colnames(R2_table) <- c(" total variance |", 
                            " BS variance |", 
                            " WS variance")
    rownames(R2_table) <- c("f1", "f2", "f(f1+f2)", "v1", "v2", "m", 
                            "vm(v1+v2+m)", "fvm(f1+f2+v1+v2+m)", 
                            "BS(f2+v2+m)")
    
    # Plot
    data_plot <- matrix(c(f1_t, f2_t, v1_t, v2_t, m_t, e_t, 
                          0, f2_bs,0, v2_bs, m_bs, 0,
                          f1_ws, 0, v1_ws, 0, 0, e_ws), nrow = 3, ncol = 6, 
                        byrow = TRUE)
    rownames(data_plot) <- c("total variance",
                             "BS variance", 
                             "WS variance")
    
    colnames(data_plot) <- c("f1: fixed location effects (WS)",
                             "f2: fixed location effects (BS)",
                             "v1: random slopes (WS)", 
                             "v2: random slopes (BS)", 
                             "m: random intercepts (BS)", 
                             "e: observation-level residuals (WS)")
    df_plot <- as.data.frame(data_plot)
    df_plot$domain <- rownames(df_plot)
    
    df_plot2 <- df_plot %>% 
      gather(key = "source", value = "proportion", -domain) %>% 
      mutate(source = fct_relevel(source, 
                                  "e: observation-level residuals (WS)", 
                                  "f1: fixed location effects (WS)",
                                  "v1: random slopes (WS)", 
                                  "m: random intercepts (BS)", 
                                  "v2: random slopes (BS)", 
                                  "f2: fixed location effects (BS)"), 
             domain = fct_relevel(domain,"total variance", 
                                  "BS variance",
                                  "WS variance"))
    
    p1 <- ggplot(df_plot2, aes(x = domain, y = proportion, fill = source)) +
      geom_bar(position = "stack", stat = "identity") +
      # Change fill color
      scale_fill_manual(values = c("coral", "brown", "chocolate", "skyblue",
                                   "royalblue4", "dodgerblue")) +
      # Change plot features
      theme(axis.title.x= element_blank(),
            axis.text.x= element_text(angle = 10, vjust = 0.5, hjust = 0.5),
            legend.position="bottom")+
      guides(fill=guide_legend(ncol=1)) 
  } else if(rand_slope == 0){
    # Proportion of total variance of the response variable explained
    f1_t <- f1/(f1 + f2 + v + e)
    f2_t <- f2/(f1 + f2 + v + e)
    f_t <- f1_t + f2_t
    v_t <- v/(f1 + f2 + v + e)
    m_t <- m/(f1 + f2 + v + e)
    v1_t <- (v - m) * v1/(v1 + v2)/(f1 + f2 + v + e)
    v2_t <- (v - m) * v2/(v1 + v2)/(f1 + f2 + v + e)
    fv_t <- f_t + v_t
    f2v_t <- f2_t + v_t
    e_t <- e/(f1 + f2 + v + e)
    
    # Proportion of BS variance of the response variable explained
    f2_bs <- f2/(f2 + v)
    v_bs <- v/(f2 + v)
    m_bs <- m/(f2 + v)
    v1_bs <- (v - m) * v1/(v1 + v2)/(f2 + v)
    v2_bs <- (v - m) * v2/(v1 + v2)/(f2 + v)
    
    # Proportion of WS variance of the response variable explained
    f1_ws <- f1/(f1 + e)
    e_ws <- e/(f1 + e)
    
    # Tables
    # Variance partitioning
    decomp_table <- matrix(c(f1_t, f2_t, v1_t, v2_t, m_t, e_t, 
                             NA, f2_bs, v1_bs, v2_bs, m_bs, NA,
                             f1_ws, NA, NA, NA, NA, e_ws), nrow = 6)
    colnames(decomp_table) <- c(" total variance |", 
                                " BS variance |", 
                                " WS variance")
    rownames(decomp_table) <- c("f1", "f2", "v1", "v2", "m", "e")
    
    # R2 measure
    R2_table <- matrix(c(f1_t, f2_t, f_t, v1_t, v2_t, m_t, v_t, fv_t, f2v_t, 
                         NA, f2_bs, NA, v1_bs, v2_bs, m_bs, v_bs, NA, NA,
                         f1_ws, NA, NA, NA, NA, NA, NA, NA, NA), nrow = 9)
    colnames(R2_table) <- c(" total variance |", 
                            " BS variance |", 
                            " WS variance")
    rownames(R2_table) <- c("f1", "f2", "f(f1+f2)", "v1", "v2", "m",
                            "v", "fv(f1+f2+v)", "BS(f2+v)")
    
    # Plot
    data_plot <- matrix(c(f1_t, f2_t, v1_t, v2_t, m_t, e_t, 
                          0, f2_bs, v1_bs, v2_bs, m_bs, 0,
                          f1_ws, 0,  0, 0, 0 , e_ws), nrow = 3, 
                        ncol = 6, byrow = TRUE)
    rownames(data_plot) <- c("total variance",
                             "BS variance", 
                             "WS variance")
    colnames(data_plot) <- c("f1: fixed location effects (WS)",
                             "f2: fixed location effects (BS)",
                             "v1: random intercepts by WS covariates (BS)",
                             "v2: random intercepts by BS covariates (BS)",
                             "m: random intercepts at mean (BS)",
                             "e: observation-level residuals (WS)")
    df_plot <- as.data.frame(data_plot)
    df_plot$domain <- rownames(df_plot)
    
    df_plot2 <- df_plot %>% 
      gather(key = "source", value = "proportion", -domain) %>% 
      mutate(source = 
               fct_relevel(source, 
                           "e: observation-level residuals (WS)",
                           "f1: fixed location effects (WS)",  
                           "v1: random intercepts by WS covariates (BS)",
                           "v2: random intercepts by BS covariates (BS)",
                           "m: random intercepts at mean (BS)",
                           "f2: fixed location effects (BS)"), 
             domain = fct_relevel(domain,"total variance", 
                                  "BS variance",
                                  "WS variance"))
    
    p1 <- ggplot(df_plot2, aes(x = domain, y = proportion, fill = source)) +
      geom_bar(position = "stack", stat = "identity") +
      # Change fill color
      scale_fill_manual(values = c("coral", "brown" , "royalblue4", 
                                   "gray92", "skyblue", "dodgerblue")) +
      # Change plot features
      theme(axis.title.x= element_blank(),
            axis.text.x= element_text(angle = 10, vjust = 0.5, hjust = 0.5),
            legend.position="bottom")+
      guides(fill=guide_legend(ncol=1))
  }
  
  df_plot_s <- data.frame("domain" = rep("scale", 4), 
                          "source" = c("e0: unexplained", 
                                       "e1: fixed slopes of WS covariates", 
                                       "e2: fixed slopes of BS covariates", 
                                       "d: random scale effects"),
                          "proportion" = c(e0_s, e1_s, e2_s, d_s))
  
  # Plot for the scale part of the model
  df_plot_s <- df_plot_s %>% 
    mutate(source = fct_relevel(source, "e0: unexplained", 
                                "e1: fixed slopes of WS covariates", 
                                "e2: fixed slopes of BS covariates", 
                                "d: random scale effects"))
  
  p_s <- ggplot(df_plot_s) +
    geom_bar(aes(x = domain, y = proportion, fill = source), 
             position = "stack", stat = "identity", width = 0.4) +
    # Change fill color
    scale_fill_manual(values = c("khaki2","seagreen", "seagreen3",  
                                 "darkolivegreen"))  + 
    geom_segment(data = data.frame(
      x = c(0.8, 0.8), 
      xend = c(0, 0),
      y = c(1, 0),
      yend = c(1, 
               1-df_plot["WS variance", 
                         "e: observation-level residuals (WS)"])), 
      aes(x=x,y=y,xend=xend,yend=yend), linetype = "dashed") + 
    # Change plot features
    theme(axis.title.x= element_blank(),
          axis.title.y= element_blank(),
          axis.ticks.y= element_blank(),
          axis.text.y= element_blank(),
          axis.text.x= element_text(angle = 10, vjust = 0.5, hjust = 0.5),
          legend.position="bottom", 
          plot.margin = unit(c(1,1,1,-0.5),"cm")) +
    guides(fill=guide_legend(ncol=1))
  
  # Combine the plot for the location part and the scale part of the model
  p_s_grob <- ggplotGrob(p_s)
  p1_grob <- ggplotGrob(p1)
  p_s_grob$heights <- p1_grob$heights
  p2 <- grid.arrange(p1_grob, p_s_grob, nrow = 1)
  
  output <- list(decomp_table, R2_table, decomp_table_s, 
                 R2_table_s, p2)
  names(output) <- c("Variance Partitioning (location)", 
                     "R2 Measures (location)", 
                     "Variance Partitioning (scale)", 
                     "R2 Measures (scale)", "plot")
  return(output)
}