From f16e4739c1bcfefd20b9ebb35e38223e50787ae0 Mon Sep 17 00:00:00 2001 From: Jan Kapar Date: Thu, 27 Jun 2024 15:07:19 +0200 Subject: [PATCH 1/3] shortcut functions --- NAMESPACE | 3 + R/shortcut_functions.R | 188 +++++++++++++++++++++++++++++++++++++++++ man/darf.Rd | 45 ++++++++++ man/earf.Rd | 44 ++++++++++ man/rarf.Rd | 44 ++++++++++ 5 files changed, 324 insertions(+) create mode 100644 R/shortcut_functions.R create mode 100644 man/darf.Rd create mode 100644 man/earf.Rd create mode 100644 man/rarf.Rd diff --git a/NAMESPACE b/NAMESPACE index 2e4d8b6..c08c972 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,13 @@ # Generated by roxygen2: do not edit by hand export(adversarial_rf) +export(darf) +export(earf) export(expct) export(forde) export(forge) export(lik) +export(rarf) import(data.table) import(ranger) import(stringr) diff --git a/R/shortcut_functions.R b/R/shortcut_functions.R new file mode 100644 index 0000000..7b9539d --- /dev/null +++ b/R/shortcut_functions.R @@ -0,0 +1,188 @@ +#' Shortcut likelihood function +#' +#' Calls \code{adversarial_rf}, \code{forde} and \code{lik}. +#' For repeated application, it is faster to save outputs of \code{adversarial_rf} +#' and \code{forde} and pass them via \code{...} or directly use \code{lik}. +#' +#' @param x Input data. Integer variables are recoded as ordered factors with +#' a warning. See Details. +#' @param ... Extra parameters to be passed to \code{adversarial_rf}, \code{forde} +#' and \code{lik}. +#' +#' @return +#' A vector of likelihoods, optionally on the log scale. A dataset of \code{n_synth} synthetic samples or of \code{nrow(x)} synthetic +#' samples if \code{n_synth} is undefined. +#' +#' +#' @references +#' Watson, D., Blesch, K., Kapar, J., & Wright, M. (2023). Adversarial random +#' forests for density estimation and generative modeling. In \emph{Proceedings +#' of the 26th International Conference on Artificial Intelligence and +#' Statistics}, pp. 5357-5375. +#' +#' +#' @examples +#' # Estimate log-likelihoods +#' ll <- darf(iris) +#' +#' # Partial evidence query +#' ll <- darf(iris, query = iris[1, 1:3]) +#' +#' # Condition on Species = "setosa" +#' ll <- darf(iris, query = iris[1, 1:3], evidence = data.frame(Species = "setosa")) +#' +#' +#' @seealso +#' \code{\link{arf}}, \code{\link{adversarial_rf}}, \code{\link{forde}}, \code{\link{forge}} +#' +#' +#' @export +#' + +darf <- function(x, query = NULL, ...) { + arg_names <- list(arf = names(as.list(args(adversarial_rf))), + forde = names(as.list(args(forde))), + lik = names(as.list(args(lik)))) + dot_args <- list(...) + + arf_args <- dot_args[names(dot_args) %in% arg_names$arf] + forde_args <- dot_args[names(dot_args) %in% arg_names$forde] + lik_args <- dot_args[names(dot_args) %in% arg_names$lik] + + if (!("verbose" %in% names(arf_args))) arf_args$verbose = F + if (!("arf" %in% names(arf_args) | "params" %in% names(forde_args))) arf <- do.call(adversarial_rf, c(x = list(x), arf_args)) + + if (!("params" %in% names(forde_args))) params <- do.call(forde, c(arf = list(arf), x = list(x), forde_args)) + + if (is.null(query)) query <- x + if (!("arf" %in% names(lik_args))) lik_args$arf <- arf + do.call(lik, c(params = list(params), + query = list(query), + lik_args)) + +} + + +#' Shortcut sampling function +#' +#' Calls \code{adversarial_rf}, \code{forde} and \code{forge}. +#' For repeated application, it is faster to save outputs of \code{adversarial_rf} +#' and \code{forde} and pass them via \code{...} or directly use \code{forge}. +#' +#' @param x Input data. Integer variables are recoded as ordered factors with +#' a warning. See Details. +#' @param ... Extra parameters to be passed to \code{adversarial_rf}, \code{forde} +#' and \code{forge}. +#' +#' @return +#' A dataset of \code{n_synth} synthetic samples or of \code{nrow(x)} synthetic +#' samples if \code{n_synth} is undefined. +#' +#' +#' @references +#' Watson, D., Blesch, K., Kapar, J., & Wright, M. (2023). Adversarial random +#' forests for density estimation and generative modeling. In \emph{Proceedings +#' of the 26th International Conference on Artificial Intelligence and +#' Statistics}, pp. 5357-5375. +#' +#' +#' @examples +#' # Generate 150 (size of original iris dataset) synthetic samples from the iris dataset +#' x_synth <- rarf(iris) +#' +#' # Generate 100 synthetic samples from the iris dataset +#' x_synth <- rarf(iris, n_synth = 100) +#' +#' Condition on Species = "setosa" +#' x_synth <- rarf(iris, evidence = data.frame(Species = "setosa")) +#' +#' @seealso +#' \code{\link{arf}}, \code{\link{adversarial_rf}}, \code{\link{forde}}, \code{\link{forge}} +#' +#' +#' @export +#' + +rarf <- function(x, n_synth = NULL, ...) { + arg_names <- list(arf = names(as.list(args(adversarial_rf))), + forde = names(as.list(args(forde))), + forge = names(as.list(args(forge)))) + dot_args <- list(...) + + arf_args <- dot_args[names(dot_args) %in% arg_names$arf] + forde_args <- dot_args[names(dot_args) %in% arg_names$forde] + forge_args <- dot_args[names(dot_args) %in% arg_names$forge] + + if (!("verbose" %in% names(arf_args))) arf_args$verbose = F + if (!("arf" %in% names(arf_args) | "params" %in% names(forde_args))) arf <- do.call(adversarial_rf, c(x = list(x), arf_args)) + + if (!("params" %in% names(forde_args))) params <- do.call(forde, c(arf = list(arf), x = list(x), forde_args)) + + if(is.null(n_synth)) n_synth <- nrow(x) + do.call(forge, c(params = list(params), + n_synth = list(n_synth), + forge_args)) +} + + +#' Shortcut expectation function +#' +#' Calls \code{adversarial_rf}, \code{forde} and \code{expct}. +#' For repeated application, it is faster to save outputs of \code{adversarial_rf} +#' and \code{forde} and pass them via \code{...} or directly use \code{expct}. +#' +#' @param x Input data. Integer variables are recoded as ordered factors with +#' a warning. See Details. +#' @param ... Extra parameters to be passed to \code{adversarial_rf}, \code{forde} +#' and \code{expct}. +#' +#' @return +#' A one row data frame with values for all query variables. +#' +#' +#' @references +#' Watson, D., Blesch, K., Kapar, J., & Wright, M. (2023). Adversarial random +#' forests for density estimation and generative modeling. In \emph{Proceedings +#' of the 26th International Conference on Artificial Intelligence and +#' Statistics}, pp. 5357-5375. +#' +#' +#' @examples +#' # What is the expected values of each feature? +#' earf(iris) +#' +#' #' # What is the expected values of Sepal.Length? +#' earf(iris, "Sepal.Length") +#' +#' # What if we condition on Species = "setosa"? +#' earf(iris, "Sepal.Length", evidence = data.frame(Species = "setosa")) +#' +#' +#' @seealso +#' \code{\link{arf}}, \code{\link{adversarial_rf}}, \code{\link{forde}}, \code{\link{expct}} +#' +#' +#' @export +#' + +earf <- function(x, query = NULL, ...) { + arg_names <- list(arf = names(as.list(args(adversarial_rf))), + forde = names(as.list(args(forde))), + expct = names(as.list(args(expct)))) + dot_args <- list(...) + + arf_args <- dot_args[names(dot_args) %in% arg_names$arf] + forde_args <- dot_args[names(dot_args) %in% arg_names$forde] + expct_args <- dot_args[names(dot_args) %in% arg_names$expct] + + if (!("verbose" %in% names(arf_args))) arf_args$verbose = F + if (!("arf" %in% names(arf_args) | "params" %in% names(forde_args))) arf <- do.call(adversarial_rf, c(x = list(x), arf_args)) + + if (!("params" %in% names(forde_args))) params <- do.call(forde, c(arf = list(arf), x = list(x), forde_args)) + + if (is.null(query)) query <- names(x) + do.call(expct, c(params = list(params), + query = list(query), + expct_args)) + +} diff --git a/man/darf.Rd b/man/darf.Rd new file mode 100644 index 0000000..397593c --- /dev/null +++ b/man/darf.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oneline_functions.R +\name{darf} +\alias{darf} +\title{Shortcut likelihood function} +\usage{ +darf(x, query = NULL, ...) +} +\arguments{ +\item{x}{Input data. Integer variables are recoded as ordered factors with +a warning. See Details.} + +\item{...}{Extra parameters to be passed to \code{adversarial_rf}, \code{forde} +and \code{lik}.} +} +\value{ +A vector of likelihoods, optionally on the log scale. A dataset of \code{n_synth} synthetic samples or of \code{nrow(x)} synthetic +samples if \code{n_synth} is undefined. +} +\description{ +Calls \code{adversarial_rf}, \code{forde} and \code{lik}. +For repeated application, it is faster to save outputs of \code{adversarial_rf} +and \code{forde} and pass them via \code{...} or directly use \code{lik}. +} +\examples{ +# Estimate log-likelihoods +ll <- darf(iris) + +# Partial evidence query +ll <- darf(iris, query = iris[1, 1:3]) + +# Condition on Species = "setosa" +ll <- darf(iris, query = iris[1, 1:3], evidence = data.frame(Species = "setosa")) + + +} +\references{ +Watson, D., Blesch, K., Kapar, J., & Wright, M. (2023). Adversarial random +forests for density estimation and generative modeling. In \emph{Proceedings +of the 26th International Conference on Artificial Intelligence and +Statistics}, pp. 5357-5375. +} +\seealso{ +\code{\link{arf}}, \code{\link{adversarial_rf}}, \code{\link{forde}}, \code{\link{forge}} +} diff --git a/man/earf.Rd b/man/earf.Rd new file mode 100644 index 0000000..e578c4d --- /dev/null +++ b/man/earf.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oneline_functions.R +\name{earf} +\alias{earf} +\title{Shortcut expectation function} +\usage{ +earf(x, query = NULL, ...) +} +\arguments{ +\item{x}{Input data. Integer variables are recoded as ordered factors with +a warning. See Details.} + +\item{...}{Extra parameters to be passed to \code{adversarial_rf}, \code{forde} +and \code{expct}.} +} +\value{ +A one row data frame with values for all query variables. +} +\description{ +Calls \code{adversarial_rf}, \code{forde} and \code{expct}. +For repeated application, it is faster to save outputs of \code{adversarial_rf} +and \code{forde} and pass them via \code{...} or directly use \code{expct}. +} +\examples{ +# What is the expected values of each feature? +earf(iris) + +#' # What is the expected values of Sepal.Length? +earf(iris, "Sepal.Length") + +# What if we condition on Species = "setosa"? +earf(iris, "Sepal.Length", evidence = data.frame(Species = "setosa")) + + +} +\references{ +Watson, D., Blesch, K., Kapar, J., & Wright, M. (2023). Adversarial random +forests for density estimation and generative modeling. In \emph{Proceedings +of the 26th International Conference on Artificial Intelligence and +Statistics}, pp. 5357-5375. +} +\seealso{ +\code{\link{arf}}, \code{\link{adversarial_rf}}, \code{\link{forde}}, \code{\link{expct}} +} diff --git a/man/rarf.Rd b/man/rarf.Rd new file mode 100644 index 0000000..7811864 --- /dev/null +++ b/man/rarf.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/oneline_functions.R +\name{rarf} +\alias{rarf} +\title{Shortcut sampling function} +\usage{ +rarf(x, n_synth = NULL, ...) +} +\arguments{ +\item{x}{Input data. Integer variables are recoded as ordered factors with +a warning. See Details.} + +\item{...}{Extra parameters to be passed to \code{adversarial_rf}, \code{forde} +and \code{forge}.} +} +\value{ +A dataset of \code{n_synth} synthetic samples or of \code{nrow(x)} synthetic +samples if \code{n_synth} is undefined. +} +\description{ +Calls \code{adversarial_rf}, \code{forde} and \code{forge}. +For repeated application, it is faster to save outputs of \code{adversarial_rf} +and \code{forde} and pass them via \code{...} or directly use \code{forge}. +} +\examples{ +# Generate 150 (size of original iris dataset) synthetic samples from the iris dataset +x_synth <- rarf(iris) + +# Generate 100 synthetic samples from the iris dataset +x_synth <- rarf(iris, n_synth = 100) + +Condition on Species = "setosa" +x_synth <- rarf(iris, evidence = data.frame(Species = "setosa")) + +} +\references{ +Watson, D., Blesch, K., Kapar, J., & Wright, M. (2023). Adversarial random +forests for density estimation and generative modeling. In \emph{Proceedings +of the 26th International Conference on Artificial Intelligence and +Statistics}, pp. 5357-5375. +} +\seealso{ +\code{\link{arf}}, \code{\link{adversarial_rf}}, \code{\link{forde}}, \code{\link{forge}} +} From 975de1170fe757eb127a6191e52b52493371072a Mon Sep 17 00:00:00 2001 From: Jan Kapar Date: Thu, 27 Jun 2024 15:44:43 +0200 Subject: [PATCH 2/3] doc --- R/shortcut_functions.R | 8 +++++--- man/darf.Rd | 5 ++++- man/earf.Rd | 4 ++-- man/rarf.Rd | 5 ++++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/R/shortcut_functions.R b/R/shortcut_functions.R index 7b9539d..cada69f 100644 --- a/R/shortcut_functions.R +++ b/R/shortcut_functions.R @@ -6,6 +6,8 @@ #' #' @param x Input data. Integer variables are recoded as ordered factors with #' a warning. See Details. +#' @param query Data frame of samples, optionally comprising just a subset of +#' training features. See Details of \code{lik}. Is set to \code{x} if \code{zero}. #' @param ... Extra parameters to be passed to \code{adversarial_rf}, \code{forde} #' and \code{lik}. #' @@ -71,6 +73,8 @@ darf <- function(x, query = NULL, ...) { #' #' @param x Input data. Integer variables are recoded as ordered factors with #' a warning. See Details. +#' @param n_synth Number of synthetic samples to generate. Is set to \code{nrow(x)} if +#' \code{NULL}. #' @param ... Extra parameters to be passed to \code{adversarial_rf}, \code{forde} #' and \code{forge}. #' @@ -165,7 +169,7 @@ rarf <- function(x, n_synth = NULL, ...) { #' @export #' -earf <- function(x, query = NULL, ...) { +earf <- function(x, ...) { arg_names <- list(arf = names(as.list(args(adversarial_rf))), forde = names(as.list(args(forde))), expct = names(as.list(args(expct)))) @@ -180,9 +184,7 @@ earf <- function(x, query = NULL, ...) { if (!("params" %in% names(forde_args))) params <- do.call(forde, c(arf = list(arf), x = list(x), forde_args)) - if (is.null(query)) query <- names(x) do.call(expct, c(params = list(params), - query = list(query), expct_args)) } diff --git a/man/darf.Rd b/man/darf.Rd index 397593c..e48ef95 100644 --- a/man/darf.Rd +++ b/man/darf.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/oneline_functions.R +% Please edit documentation in R/shortcut_functions.R \name{darf} \alias{darf} \title{Shortcut likelihood function} @@ -10,6 +10,9 @@ darf(x, query = NULL, ...) \item{x}{Input data. Integer variables are recoded as ordered factors with a warning. See Details.} +\item{query}{Data frame of samples, optionally comprising just a subset of +training features. See Details of \code{lik}. Is set to \code{x} if \code{zero}.} + \item{...}{Extra parameters to be passed to \code{adversarial_rf}, \code{forde} and \code{lik}.} } diff --git a/man/earf.Rd b/man/earf.Rd index e578c4d..09576bd 100644 --- a/man/earf.Rd +++ b/man/earf.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/oneline_functions.R +% Please edit documentation in R/shortcut_functions.R \name{earf} \alias{earf} \title{Shortcut expectation function} \usage{ -earf(x, query = NULL, ...) +earf(x, ...) } \arguments{ \item{x}{Input data. Integer variables are recoded as ordered factors with diff --git a/man/rarf.Rd b/man/rarf.Rd index 7811864..ef3478f 100644 --- a/man/rarf.Rd +++ b/man/rarf.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/oneline_functions.R +% Please edit documentation in R/shortcut_functions.R \name{rarf} \alias{rarf} \title{Shortcut sampling function} @@ -10,6 +10,9 @@ rarf(x, n_synth = NULL, ...) \item{x}{Input data. Integer variables are recoded as ordered factors with a warning. See Details.} +\item{n_synth}{Number of synthetic samples to generate. Is set to \code{nrow(x)} if +\code{NULL}.} + \item{...}{Extra parameters to be passed to \code{adversarial_rf}, \code{forde} and \code{forge}.} } From ad10f83486af2176a7f0e0b48f45ceece1842432 Mon Sep 17 00:00:00 2001 From: Jan Kapar Date: Thu, 27 Jun 2024 16:10:47 +0200 Subject: [PATCH 3/3] docs again --- R/shortcut_functions.R | 6 +++--- man/earf.Rd | 4 ++-- man/rarf.Rd | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/shortcut_functions.R b/R/shortcut_functions.R index cada69f..5bd7742 100644 --- a/R/shortcut_functions.R +++ b/R/shortcut_functions.R @@ -97,7 +97,7 @@ darf <- function(x, query = NULL, ...) { #' # Generate 100 synthetic samples from the iris dataset #' x_synth <- rarf(iris, n_synth = 100) #' -#' Condition on Species = "setosa" +#' # Condition on Species = "setosa" #' x_synth <- rarf(iris, evidence = data.frame(Species = "setosa")) #' #' @seealso @@ -156,10 +156,10 @@ rarf <- function(x, n_synth = NULL, ...) { #' earf(iris) #' #' #' # What is the expected values of Sepal.Length? -#' earf(iris, "Sepal.Length") +#' earf(iris, query = "Sepal.Length") #' #' # What if we condition on Species = "setosa"? -#' earf(iris, "Sepal.Length", evidence = data.frame(Species = "setosa")) +#' earf(iris, query = "Sepal.Length", evidence = data.frame(Species = "setosa")) #' #' #' @seealso diff --git a/man/earf.Rd b/man/earf.Rd index 09576bd..478e1db 100644 --- a/man/earf.Rd +++ b/man/earf.Rd @@ -26,10 +26,10 @@ and \code{forde} and pass them via \code{...} or directly use \code{expct}. earf(iris) #' # What is the expected values of Sepal.Length? -earf(iris, "Sepal.Length") +earf(iris, query = "Sepal.Length") # What if we condition on Species = "setosa"? -earf(iris, "Sepal.Length", evidence = data.frame(Species = "setosa")) +earf(iris, query = "Sepal.Length", evidence = data.frame(Species = "setosa")) } diff --git a/man/rarf.Rd b/man/rarf.Rd index ef3478f..755910e 100644 --- a/man/rarf.Rd +++ b/man/rarf.Rd @@ -32,7 +32,7 @@ x_synth <- rarf(iris) # Generate 100 synthetic samples from the iris dataset x_synth <- rarf(iris, n_synth = 100) -Condition on Species = "setosa" +# Condition on Species = "setosa" x_synth <- rarf(iris, evidence = data.frame(Species = "setosa")) }