From 72702240871a4c498e3fd5131ad69cde62b3615c Mon Sep 17 00:00:00 2001 From: Ken Kellner Date: Wed, 23 Oct 2024 15:13:40 -0400 Subject: [PATCH 1/2] Factor random effects seem to be working --- R/mixedModelTools.R | 66 ++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/R/mixedModelTools.R b/R/mixedModelTools.R index 640d1d2..abecc96 100644 --- a/R/mixedModelTools.R +++ b/R/mixedModelTools.R @@ -7,10 +7,10 @@ get_xlev <- function(data, model_frame){ get_reTrms <- function(formula, data, newdata=NULL){ fb <- reformulas::findbars(formula) mf <- model.frame(reformulas::subbars(formula), data, na.action=stats::na.pass) - if(is.null(newdata)) return(reformulas::mkReTrms(fb, mf)) + if(is.null(newdata)) return(reformulas::mkReTrms(fb, mf, calc.lambdat = FALSE)) new_mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass, xlev=get_xlev(data, mf)) - reformulas::mkReTrms(fb, new_mf, drop.unused.levels=FALSE) + reformulas::mkReTrms(fb, new_mf, drop.unused.levels=FALSE, calc.lambdat = FALSE) } get_Z <- function(formula, data, newdata=NULL){ @@ -21,25 +21,38 @@ get_Z <- function(formula, data, newdata=NULL){ return(Matrix::Matrix(matrix(0, nrow=nrow(newdata), ncol=0),sparse=TRUE)) } } - check_formula(formula, data) - Zt <- get_reTrms(formula, data, newdata)$Zt - Z <- t(as.matrix(Zt)) - Matrix::Matrix(Z, sparse=TRUE) + check_formula(formula) + Zt_list <- get_reTrms(formula, data, newdata)$Ztlist + + # Reorder rows by random effect instead of by level + # so d d d e e e --> d e d e d e + # this should only change things for factors + out <- lapply(Zt_list, function(x){ + id <- ave(rownames(x) == rownames(x), rownames(x), FUN=cumsum) + x[order(id),,drop=FALSE] + }) + Zt <- do.call(rbind, out) + Matrix::t(Zt) } -get_group_vars <- function(formula){ +get_group_vars <- function(formula, data){ rand <- reformulas::findbars(formula) - ifelse(is.null(rand), 0, length(rand)) + if(is.null(rand)) return(0) + re <- get_reTrms(formula, data) + length(unlist(re$cnms)) } get_nrandom <- function(formula, data){ rand <- reformulas::findbars(formula) if(length(rand)==0) return(as.array(0)) + + re <- get_reTrms(formula, data) - out <- sapply(rand, function(x){ - col_nm <- as.character(x[[3]]) - length(unique(data[[col_nm]])) + col_nms <- rep(names(re$cnms), sapply(re$cnms, length)) + out <- sapply(col_nms, function(x){ + length(unique(data[[x]])) }) + as.array(out) } @@ -56,23 +69,32 @@ sigma_names <- function(formula, data){ nms } -check_formula <- function(formula, data){ +form_has_correlated_effects <- function(formula){ + bars <- reformulas::findbars(formula) + if(is.null(bars)) return(FALSE) + out <- sapply(bars, function(x){ + form <- stats::as.formula(as.call(list(as.name("~"), x[[2]]))) + terms_obj <- stats::terms(form) + terms <- attr(terms_obj, "term.labels") + if(attr(terms_obj, "intercept")) terms <- c("1", terms) + length(terms) > 1 + }) + any(out) +} + +check_formula <- function(formula){ rand <- reformulas::findbars(formula) if(is.null(rand)) return(invisible()) + if(form_has_correlated_effects(formula)){ + stop("Correlated random slopes and intercepts are not supported. Replace | with || in your formula(s).", call.=FALSE) + } + char <- paste(formula, collapse=" ") if(grepl(":|/", char)){ stop("Nested random effects (using / and :) are not supported", call.=FALSE) } - theta <- get_reTrms(formula, data)$theta - if(0 %in% theta){ - stop("Failed to create random effects model matrix.\n -Possible reasons:\n -(1) Correlated slopes and intercepts are not supported. Replace | with || in your formula(s).\n -(2) You have specified random slopes for an R factor variable. Try converting the variable to a series of indicator variables instead.", - call.=FALSE) - } } split_formula <- function(formula){ @@ -126,7 +148,7 @@ use_tmb_bootstrap <- function(mod, type, re.form){ # Gather information about grouping variables for a given submodel get_randvar_info <- function(tmb_report, type, formula, data){ - ngv <- get_group_vars(formula) + ngv <- get_group_vars(formula, data) if(ngv == 0) return(list()) #Return blank list if there are no grouping variables sigma_type <- paste0("lsigma_",type) @@ -305,7 +327,7 @@ setMethod("randomTerms", "unmarkedFit", function(object, type, level=0.95, ...){ get_ranef_inputs <- function(forms, datalist, dms, Zs){ stopifnot(!is.null(names(datalist))) mods <- names(datalist) - ngv <- lapply(forms, get_group_vars) + ngv <- mapply(get_group_vars, forms, datalist) names(ngv) <- paste0("n_group_vars_",mods) ngroup <- mapply(get_nrandom, forms, datalist, SIMPLIFY=FALSE) names(ngroup) <- paste0("n_grouplevels_",mods) From 0fefc9111160cac31747d881821dea7b7bb9bc2c Mon Sep 17 00:00:00 2001 From: Ken Kellner Date: Fri, 25 Oct 2024 13:02:49 -0400 Subject: [PATCH 2/2] Fix methods and add tests --- DESCRIPTION | 4 +- R/mixedModelTools.R | 30 ++++--- tests/testthat/test_random_effects.R | 120 +++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 14 deletions(-) create mode 100644 tests/testthat/test_random_effects.R diff --git a/DESCRIPTION b/DESCRIPTION index b0b3814..c604cd4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.4.3.9004 -Date: 2024-09-19 +Version: 1.4.3.9005 +Date: 2024-10-25 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( diff --git a/R/mixedModelTools.R b/R/mixedModelTools.R index abecc96..b249bac 100644 --- a/R/mixedModelTools.R +++ b/R/mixedModelTools.R @@ -28,7 +28,7 @@ get_Z <- function(formula, data, newdata=NULL){ # so d d d e e e --> d e d e d e # this should only change things for factors out <- lapply(Zt_list, function(x){ - id <- ave(rownames(x) == rownames(x), rownames(x), FUN=cumsum) + id <- stats::ave(rownames(x) == rownames(x), rownames(x), FUN=cumsum) x[order(id),,drop=FALSE] }) Zt <- do.call(rbind, out) @@ -89,8 +89,11 @@ check_formula <- function(formula){ if(form_has_correlated_effects(formula)){ stop("Correlated random slopes and intercepts are not supported. Replace | with || in your formula(s).", call.=FALSE) } + + rhs <- lapply(rand, function(x) x[[3]]) + rhs_all <- lapply(rhs, function(x) paste(deparse(x), collapse="")) - char <- paste(formula, collapse=" ") + char <- paste(rhs_all, collapse=" ") if(grepl(":|/", char)){ stop("Nested random effects (using / and :) are not supported", call.=FALSE) @@ -156,11 +159,12 @@ get_randvar_info <- function(tmb_report, type, formula, data){ sigma_est <- tmb_report$par.fixed[sigma_ind] sigma_cov <- as.matrix(tmb_report$cov.fixed[sigma_ind,sigma_ind]) re <- get_reTrms(formula, data) + Z <- get_Z(formula, data) # inefficient!! list(names=sigma_names(formula, data), estimates=sigma_est, covMat=sigma_cov, invlink="exp", invlinkGrad="exp", n_obs=nrow(data), n_levels=lapply(re$flist, function(x) length(levels(x))), cnms=re$cnms, - levels=rownames(re$Zt)) + levels=colnames(Z)) } get_fixed_names <- function(tmb_report){ @@ -175,7 +179,8 @@ print_randvar_info <- function(object){ val <- do.call(object$invlink, list(object$estimates)) - disp <- data.frame(Groups=names(object$cnms), Name=unlist(object$cnms), + disp <- data.frame(Groups=rep(names(object$cnms), sapply(object$cnms, length)), + Name=unlist(object$cnms), Variance=round(val^2,3), Std.Dev.=round(val,3)) cat("Random effects:\n") print(disp, row.names=FALSE) @@ -246,7 +251,7 @@ setMethod("sigma", "unmarkedEstimate", function(object, level=0.95, ...){ ses <- sqrt(diag(rinf$covMat)) lower <- vals - z*ses upper <- vals + z*ses - Groups <- names(rinf$cnms) + Groups <- rep(names(rinf$cnms), sapply(rinf$cnms, length)) Name <- unlist(rinf$cnms) data.frame(Model=object@short.name, Groups=Groups, Name=Name, sigma=exp(vals), lower=exp(lower), upper=exp(upper)) @@ -280,16 +285,17 @@ setMethod("randomTerms", "unmarkedEstimate", function(object, level=0.95, ...){ stop("No random effects in this submodel", call.=FALSE) } - Groups <- lapply(1:length(rv$cnms), function(x){ - gn <- names(rv$cnms)[x] - rep(gn, rv$n_levels[[gn]]) + gname <- rep(names(rv$cnms), sapply(rv$cnms, length)) + + Groups <- lapply(1:length(gname), function(x){ + rep(gname[x], rv$n_levels[[gname[x]]]) }) Groups <- do.call(c, Groups) - Name <- lapply(1:length(rv$cnms), function(x){ - gn <- names(rv$cnms)[x] - var <- rv$cnms[[x]] - rep(var, rv$n_levels[[gn]]) + Name <- unlist(rv$cnms) + + Name <- lapply(1:length(Name), function(x){ + rep(Name[x], rv$n_levels[[gname[x]]]) }) Name <- do.call(c, Name) diff --git a/tests/testthat/test_random_effects.R b/tests/testthat/test_random_effects.R new file mode 100644 index 0000000..3cacfc9 --- /dev/null +++ b/tests/testthat/test_random_effects.R @@ -0,0 +1,120 @@ +context("random effects tools") +skip_on_cran() + +set.seed(123) +M <- 10 +J <- 3 + +sc <- data.frame(x1 = rnorm(M), + x2 = factor(sample(letters[1:2], M, replace=TRUE)), + x3 = factor(sample(letters[3:4], M, replace=TRUE)), + g = factor(sample(letters[5:7], M, replace=TRUE))) + +test_that("random factor slopes work", { + + test1 <- get_Z(~x1 + x2 + (x1 + x2 || g), sc) + expect_is(test1, "dgCMatrix") + + test1 <- as.matrix(test1) + expect_equal(colnames(test1), rep(letters[5:7], 4)) + + expect_equal(test1, structure(c(0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, +0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1.55870831414912, +0, 0.129287735160946, 1.71506498688328, 0, 0, 0, -0.445661970099958, +0, -0.23017748948328, 0, 0, 0, 0, 0.460916205989202, 0, 0, 0, +-0.560475646552213, 0, 0, 0.070508391424576, 0, 0, 0, -1.26506123460653, +-0.686852851893526, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 1, 0, 0), dim = c(10L, 12L), dimnames = list(c("1", "2", +"3", "4", "5", "6", "7", "8", "9", "10"), c("e", "f", "g", "e", +"f", "g", "e", "f", "g", "e", "f", "g")))) + + sig <- sigma_names(~x1 + x2 + (x1 + x2 || g), sc) + expect_equal(sig, + c("1|g", "x1|g", "x2a|g", "x2b|g")) + + nr <- get_nrandom(~x1 + x2 + (x1 + x2 || g), sc) + expect_equal(length(nr), length(sig)) + expect_equal(sum(nr), ncol(test1)) + + test2 <- as.matrix(get_Z(~0 + x2:x3 + (0 + x2:x3 | g), sc)) + sig <- sigma_names(~0 + x2:x3 + (0 + x2:x3 | g), sc) + expect_equal(sig, c("x2a:x3c|g", "x2b:x3c|g", "x2a:x3d|g", "x2b:x3d|g")) + + expect_equal(test2, structure(c(0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, +0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, +1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), dim = c(10L, +12L), dimnames = list(c("1", "2", "3", "4", "5", "6", "7", "8", +"9", "10"), c("e", "f", "g", "e", "f", "g", "e", "f", "g", "e", +"f", "g")))) + + nr <- get_nrandom(~0 + x2:x3 + (0 + x2:x3 | g), sc) + expect_equal(length(nr), length(sig)) + expect_equal(sum(nr), ncol(test2)) +}) + +test_that("random factor slope estimates work", { +skip_on_ci() + +M <- 10000 +J <- 3 + +set.seed(123) +sc <- data.frame(x1 = rnorm(M), + x2 = factor(sample(letters[1:2], M, replace=TRUE)), + x3 = factor(sample(letters[3:4], M, replace=TRUE)), + g = factor(sample(letters[5:26], M, replace=TRUE))) + +X <- model.matrix(~x1 + x2, sc) + +b <- c(-0.3, 0.3, 0.5) + +b2 <- c(-0.3, 0.3, 0, 0.5) + +re <- lapply(1:length(b2), function(i){ + rnorm(length(levels(sc$g)), 0, 0.2) +}) + +Z <- as.matrix(unmarked:::get_Z(~x1 + x2 + (x1 + x2 ||g), sc)) + +Xall <- cbind(X, Z) + +ball <- c(b, unlist(re)) + +psi <- plogis(Xall %*% ball) +z <- rbinom(M, 1, psi) + +y <- matrix(NA, M, J) +for (i in 1:M){ + y[i,] <- rbinom(3, 1, 0.4) * z[i] +} + +umf <- unmarkedFrameOccu(y=y, siteCovs=sc) + +fit <- occu(~1~x1 + x2 + (x1 + x2 || g), umf) + +expect_equivalent(coef(fit), + c(b, qlogis(0.4)), tol=0.1) + +expect_equivalent(sigma(fit)$sigma, + c(0.2865,0.1859,0.3004,0.1928), tol=1e-4) + +re_est <- randomTerms(fit) +expect_equal(re_est$Level, rep(levels(sc$g), 4)) + +pr <- predict(fit, 'state', level=NULL)$Predicted + +expect_true(cor(psi, pr) > 0.95) + +# Two grouping variables +fit2 <- occu(~1~x1 + x2 + (x1 + x2 || g) + (1|x3), umf[1:100,]) + +expect_equal(nrow(sigma(fit2)), 5) + +re <- randomTerms(fit2) +expect_equal(re$Groups[89:90], c("x3", "x3")) +})