# Function arguments # data: Data used to fit the MELS model. It needs to be consistent with what the modeling algorithm actually use. For instance, if all observations with missing value(s) are dropped when fitting the models, they should also be taken out here. # slope: 1 if the model includes random slopes, 0 if the model includes only random intercepts (of which variance can be influenced by covariates) # subject: Column name/number of the variable that indicates subjects' ID's # fixed_covs: Vector/scalar of column name(s)/number(s) of fixed location effect covariates # rand_covs: Vector/scalar of column name(s)/number(s) of random slope covariates; include only if rand_slope = 1 # bs_covs: Vector/scalar of column name(s)/number(s) of covariates for random intercept variance; include only if rand_slope = 0 # ws_covs: Vector/scalar of column name(s)/numbers(s) of covariates for WS variances # beta0: Estimate of fixed intercept # beta: Vector/scalar of estimate(s) of fixed slope(s) # alpha0: Estimate of fixed intercept in the model predicting variance of the random intercept; include only if rand_slope = 0 # alpha: Vector/scalar of estimate(s) of fixed slope(s) in the model predicting variance of the random intercept; include only if rand_slope = 0 # sigma_v: Estimate of the variance-covariance matrix of random intercept and random slopes; include only if rand_slope = 1 # tau0: Estimate of the fixed intercept in the scale model # tau: Vector/scalar of estimate(s) of the fixed slope(s) in the scale model # sigma_omega: Estimate of the standard deviation of the random scale effect # Function code library(MASS) library(tidyverse) library(nloptr) library(rockchalk) library(gtable) library(grid) library(gridExtra) r2MELS <- function(data = NULL, rand_slope=0, subject = NULL, fixed_covs = NULL, rand_covs = NULL, bs_covs = NULL, ws_covs = NULL, beta0 = NULL, beta = NULL, alpha0 = NULL, alpha = NULL, sigma_v = NULL, tau0 = NULL, tau = NULL, sigma_omega = NULL){ # Part I: Variance Partitioning # Calculation of f1, f2, and e (e0, e1, e2, d) is the same # for the two kinds of models if(!is.null(fixed_covs)){ centered_fixed_covs <- gmc(data,fixed_covs,subject) # Make a new dataset with only the first observation of each subject # for the calculation of phi_x_b # To accommodate unbalanced design # (different numbers of observations for each subject) fixed_1obs <- centered_fixed_covs %>% group_by_at(subject) %>% filter(row_number()==1) phi_x_w <- var(centered_fixed_covs[,paste0(fixed_covs, "_dev")]) phi_x_b <- var(fixed_1obs[,paste0(fixed_covs, "_mn")]) f1 <- t(beta) %*% phi_x_w %*% beta f2 <- t(beta) %*% phi_x_b %*% beta } else{ f1 <- 0 f2 <- 0 } if(!is.null(ws_covs)){ centered_ws_covs <- gmc(data,ws_covs,subject) # Make a new dataset with only the first observation of each subject # for the calculation of phi_w_b # To accommodate unbalanced design # (different numbers of observations for each subject) ws_1obs <- centered_ws_covs %>% group_by_at(subject) %>% filter(row_number()==1) phi_w_w <- var(centered_ws_covs[,paste0(ws_covs, "_dev")]) phi_w_b <- var(ws_1obs[,paste0(ws_covs, "_mn")]) mu_w <- colMeans(ws_1obs[,paste0(ws_covs, "_mn")]) e <- exp(tau0 + t(mu_w) %*% tau + (t(tau) %*% phi_w_w %*% tau + t(tau) %*% phi_w_b %*% tau + sigma_omega^2)/2) e0 <- exp(tau0 + t(mu_w) %*% tau) e1 <- t(tau) %*% phi_w_w %*% tau/2 e2 <- t(tau) %*% phi_w_b %*% tau/2 d <- (sigma_omega^2)/2 } else{ if (!is.null(sigma_omega)){ e <- exp(tau0 + sigma_omega^2/2) e0 <- exp(tau0) e1 <- 0 e2 <- 0 d <- (sigma_omega^2)/2 } else{ e <- exp(tau0) e0 <- exp(tau0) e1 <- 0 e2 <- 0 d <- 0 } } # Calculation of variance of the response variable explained by # the random location effects # Different for the two kinds of models if(rand_slope == 1){ # Random location effects are included through random intercepts and # random slopes of covariates in the location model if(!is.null(rand_covs)){ centered_rand_covs <- gmc(data,rand_covs,subject) # Make a new dataset with only the first observation of each subject # for the calculation of phi_z_b & mu_z # to accommodate unbalanced design # (different numbers of observations for each subject) rand_1obs <- centered_rand_covs %>% group_by_at(subject) %>% filter(row_number()==1) phi_z_w <- var(centered_rand_covs[,paste0(rand_covs, "_dev")]) phi_z_b <- var(rand_1obs[,paste0(rand_covs, "_mn")]) v1 <- sum(diag(phi_z_w %*% sigma_v[-1, -1])) v2 <- sum(diag(phi_z_b %*% sigma_v[-1, -1])) mu_z <- c(1, colMeans(rand_1obs[,paste0(rand_covs, "_mn")])) m <- t(mu_z) %*% sigma_v %*% mu_z } else{ v1 <- 0 v2 <- 0 m <- sigma_v } } else if(rand_slope == 0){ # Random location effects are included through random intercepts # with covariate-influenced variances if(!is.null(bs_covs)){ centered_bs_covs <- gmc(data,bs_covs,subject) # Make a new dataset with only the first observation of each subject # for the calculation of phi_u_b & mu_u # to accommodate unbalanced design # (different numbers of observations for each subject) bs_1obs <- centered_bs_covs %>% group_by_at(subject) %>% filter(row_number()==1) phi_u_w <- var(centered_bs_covs[,paste0(bs_covs, "_dev")]) phi_u_b <- var(bs_1obs[,paste0(bs_covs, "_mn")]) mu_u <- colMeans(bs_1obs[,paste0(bs_covs, "_mn")]) v <- exp(alpha0 + t(mu_u) %*% alpha + (t(alpha) %*% phi_u_w %*% alpha + t(alpha) %*% phi_u_b %*% alpha)/2) m <- exp(alpha0 + t(mu_u) %*% alpha) v1 <- t(alpha) %*% phi_u_w %*% alpha/2 v2 <- t(alpha) %*% phi_u_b %*% alpha/2 } else{ v <- exp(alpha0) m <- exp(alpha0) v1 <- 0 v2 <- 0 } } # Part II: Calculate the R_squared measures & Plot & Results Table # Scale part e0_s <- e0/e e1_s <- (e - e0) * e1/(e1 + e2 + d)/e e2_s <- (e - e0) * e2/(e1 + e2 + d)/e d_s <- (e - e0) * d/(e1 + e2 + d)/e decomp_table_s <- matrix(c(e1_s, e2_s, d_s, e0_s), nrow = 4) colnames(decomp_table_s) <- "scale" rownames(decomp_table_s) <- c("e1", "e2", "d", "e0") R2_table_s <- matrix(c(e1_s, e2_s, e1_s+e2_s, d_s), nrow = 4) colnames(R2_table_s) <- "scale" rownames(R2_table_s) <- c("e1", "e2", "e1e2", "d") # Location part if(rand_slope == 1){ # Proportion of total variance of the response variable explained f1_t <- f1/(f1 + f2 + v1 + v2 + m + e) f2_t <- f2/(f1 + f2 + v1 + v2 + m + e) f_t <- f1_t + f2_t v1_t <- v1/(f1 + f2 + v1 + v2 + m + e) v2_t <- v2/(f1 + f2 + v1 + v2 + m + e) m_t <- m/(f1 + f2 + v1 + v2 + m + e) vm_t <- v1_t + v2_t + m_t fvm_t <- f_t + vm_t f2v2m_t <- f2_t + v2_t + m_t e_t <- e/(f1 + f2 + v1 + v2 + m + e) # Proportion of BS variance of the response variable explained f2_bs <- f2/(f2 + v2 + m) v2_bs <- v2/(f2 + v2 + m) m_bs <- m/(f2 + v2 + m) v2m_bs <- v2_bs + m_bs # Proportion of WS variance of the response variable explained f1_ws <- f1/(f1 + v1 + e) v1_ws <- v1/(f1 + v1 + e) f1v1_ws <- f1_ws + v1_ws e_ws <- e/(f1 + v1 + e) # Table # Variance partitioning decomp_table <- matrix(c(f1_t, f2_t, v1_t, v2_t, m_t, e_t, NA, f2_bs, NA, v2_bs, m_bs, NA, f1_ws, NA, v1_ws, NA, NA, e_ws), nrow = 6) colnames(decomp_table) <- c(" total variance |", " BS variance |", " WS variance") rownames(decomp_table) <- c("f1", "f2", "v1", "v2", "m", "e") # R2 measure R2_table <- matrix(c(f1_t, f2_t, f_t, v1_t, v2_t, m_t, vm_t, fvm_t, f2v2m_t, NA, f2_bs, NA, NA, v2_bs, m_bs, NA, NA, NA, f1_ws, NA, NA, v1_ws, NA, NA, NA, NA, NA), nrow = 9) colnames(R2_table) <- c(" total variance |", " BS variance |", " WS variance") rownames(R2_table) <- c("f1", "f2", "f(f1+f2)", "v1", "v2", "m", "vm(v1+v2+m)", "fvm(f1+f2+v1+v2+m)", "BS(f2+v2+m)") # Plot data_plot <- matrix(c(f1_t, f2_t, v1_t, v2_t, m_t, e_t, 0, f2_bs,0, v2_bs, m_bs, 0, f1_ws, 0, v1_ws, 0, 0, e_ws), nrow = 3, ncol = 6, byrow = TRUE) rownames(data_plot) <- c("total variance", "BS variance", "WS variance") colnames(data_plot) <- c("f1: fixed location effects (WS)", "f2: fixed location effects (BS)", "v1: random slopes (WS)", "v2: random slopes (BS)", "m: random intercepts (BS)", "e: observation-level residuals (WS)") df_plot <- as.data.frame(data_plot) df_plot$domain <- rownames(df_plot) df_plot2 <- df_plot %>% gather(key = "source", value = "proportion", -domain) %>% mutate(source = fct_relevel(source, "e: observation-level residuals (WS)", "f1: fixed location effects (WS)", "v1: random slopes (WS)", "m: random intercepts (BS)", "v2: random slopes (BS)", "f2: fixed location effects (BS)"), domain = fct_relevel(domain,"total variance", "BS variance", "WS variance")) p1 <- ggplot(df_plot2, aes(x = domain, y = proportion, fill = source)) + geom_bar(position = "stack", stat = "identity") + # Change fill color scale_fill_manual(values = c("coral", "brown", "chocolate", "skyblue", "royalblue4", "dodgerblue")) + # Change plot features theme(axis.title.x= element_blank(), axis.text.x= element_text(angle = 10, vjust = 0.5, hjust = 0.5), legend.position="bottom")+ guides(fill=guide_legend(ncol=1)) } else if(rand_slope == 0){ # Proportion of total variance of the response variable explained f1_t <- f1/(f1 + f2 + v + e) f2_t <- f2/(f1 + f2 + v + e) f_t <- f1_t + f2_t v_t <- v/(f1 + f2 + v + e) m_t <- m/(f1 + f2 + v + e) v1_t <- (v - m) * v1/(v1 + v2)/(f1 + f2 + v + e) v2_t <- (v - m) * v2/(v1 + v2)/(f1 + f2 + v + e) fv_t <- f_t + v_t f2v_t <- f2_t + v_t e_t <- e/(f1 + f2 + v + e) # Proportion of BS variance of the response variable explained f2_bs <- f2/(f2 + v) v_bs <- v/(f2 + v) m_bs <- m/(f2 + v) v1_bs <- (v - m) * v1/(v1 + v2)/(f2 + v) v2_bs <- (v - m) * v2/(v1 + v2)/(f2 + v) # Proportion of WS variance of the response variable explained f1_ws <- f1/(f1 + e) e_ws <- e/(f1 + e) # Tables # Variance partitioning decomp_table <- matrix(c(f1_t, f2_t, v1_t, v2_t, m_t, e_t, NA, f2_bs, v1_bs, v2_bs, m_bs, NA, f1_ws, NA, NA, NA, NA, e_ws), nrow = 6) colnames(decomp_table) <- c(" total variance |", " BS variance |", " WS variance") rownames(decomp_table) <- c("f1", "f2", "v1", "v2", "m", "e") # R2 measure R2_table <- matrix(c(f1_t, f2_t, f_t, v1_t, v2_t, m_t, v_t, fv_t, f2v_t, NA, f2_bs, NA, v1_bs, v2_bs, m_bs, v_bs, NA, NA, f1_ws, NA, NA, NA, NA, NA, NA, NA, NA), nrow = 9) colnames(R2_table) <- c(" total variance |", " BS variance |", " WS variance") rownames(R2_table) <- c("f1", "f2", "f(f1+f2)", "v1", "v2", "m", "v", "fv(f1+f2+v)", "BS(f2+v)") # Plot data_plot <- matrix(c(f1_t, f2_t, v1_t, v2_t, m_t, e_t, 0, f2_bs, v1_bs, v2_bs, m_bs, 0, f1_ws, 0, 0, 0, 0 , e_ws), nrow = 3, ncol = 6, byrow = TRUE) rownames(data_plot) <- c("total variance", "BS variance", "WS variance") colnames(data_plot) <- c("f1: fixed location effects (WS)", "f2: fixed location effects (BS)", "v1: random intercepts by WS covariates (BS)", "v2: random intercepts by BS covariates (BS)", "m: random intercepts at mean (BS)", "e: observation-level residuals (WS)") df_plot <- as.data.frame(data_plot) df_plot$domain <- rownames(df_plot) df_plot2 <- df_plot %>% gather(key = "source", value = "proportion", -domain) %>% mutate(source = fct_relevel(source, "e: observation-level residuals (WS)", "f1: fixed location effects (WS)", "v1: random intercepts by WS covariates (BS)", "v2: random intercepts by BS covariates (BS)", "m: random intercepts at mean (BS)", "f2: fixed location effects (BS)"), domain = fct_relevel(domain,"total variance", "BS variance", "WS variance")) p1 <- ggplot(df_plot2, aes(x = domain, y = proportion, fill = source)) + geom_bar(position = "stack", stat = "identity") + # Change fill color scale_fill_manual(values = c("coral", "brown" , "royalblue4", "gray92", "skyblue", "dodgerblue")) + # Change plot features theme(axis.title.x= element_blank(), axis.text.x= element_text(angle = 10, vjust = 0.5, hjust = 0.5), legend.position="bottom")+ guides(fill=guide_legend(ncol=1)) } df_plot_s <- data.frame("domain" = rep("scale", 4), "source" = c("e0: unexplained", "e1: fixed slopes of WS covariates", "e2: fixed slopes of BS covariates", "d: random scale effects"), "proportion" = c(e0_s, e1_s, e2_s, d_s)) # Plot for the scale part of the model df_plot_s <- df_plot_s %>% mutate(source = fct_relevel(source, "e0: unexplained", "e1: fixed slopes of WS covariates", "e2: fixed slopes of BS covariates", "d: random scale effects")) p_s <- ggplot(df_plot_s) + geom_bar(aes(x = domain, y = proportion, fill = source), position = "stack", stat = "identity", width = 0.4) + # Change fill color scale_fill_manual(values = c("khaki2","seagreen", "seagreen3", "darkolivegreen")) + geom_segment(data = data.frame( x = c(0.8, 0.8), xend = c(0, 0), y = c(1, 0), yend = c(1, 1-df_plot["WS variance", "e: observation-level residuals (WS)"])), aes(x=x,y=y,xend=xend,yend=yend), linetype = "dashed") + # Change plot features theme(axis.title.x= element_blank(), axis.title.y= element_blank(), axis.ticks.y= element_blank(), axis.text.y= element_blank(), axis.text.x= element_text(angle = 10, vjust = 0.5, hjust = 0.5), legend.position="bottom", plot.margin = unit(c(1,1,1,-0.5),"cm")) + guides(fill=guide_legend(ncol=1)) # Combine the plot for the location part and the scale part of the model p_s_grob <- ggplotGrob(p_s) p1_grob <- ggplotGrob(p1) p_s_grob$heights <- p1_grob$heights p2 <- grid.arrange(p1_grob, p_s_grob, nrow = 1) output <- list(decomp_table, R2_table, decomp_table_s, R2_table_s, p2) names(output) <- c("Variance Partitioning (location)", "R2 Measures (location)", "Variance Partitioning (scale)", "R2 Measures (scale)", "plot") return(output) }