diff --git a/DESCRIPTION b/DESCRIPTION index 14fa516..885b4d5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: xafty -Title: Lightweight package to validate a table -Version: 1.0.0 +Title: Lightweight package to easily validate a table +Version: 1.1.0 Author: David Crone Maintainer: The package maintainer Description: The package is designed to facilitate task automation for diff --git a/NAMESPACE b/NAMESPACE index a0f9c7a..e96a36a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,10 +6,11 @@ export(check_column_notempty) export(check_column_number) export(check_column_types) export(check_validity) +export(filter_column_empty) export(filter_column_exactinput) export(filter_column_patterninput) export(filter_column_type) -export(read_example_data) +importFrom(readxl,read_xlsx) importFrom(stats,setNames) importFrom(utils,read.csv) importFrom(utils,read.csv2) diff --git a/R/check_column_number.R b/R/check_column_number.R index 49ec369..fb4c418 100644 --- a/R/check_column_number.R +++ b/R/check_column_number.R @@ -8,8 +8,10 @@ #' \item "equal": If number of columns in both tables should be equal #' \item "larger": If number of columns in both tables should be equal or larger #' } +#' @param simply Boolean. Changes the return value of the function to TRUE or FALSE, whether the test passes. +#' @return A data.frame if simply is FALSE and a Boolean of length 1 if simply is TRUE #' @export -check_column_number <- function(check_table, validity_table, check_type = "equal") { +check_column_number <- function(check_table, validity_table, check_type = "equal", simply = FALSE) { n_cols_check_table <- ncol(check_table) n_cols_validity_table <- ncol(validity_table) @@ -18,23 +20,30 @@ check_column_number <- function(check_table, validity_table, check_type = "equal result <- n_cols_check_table >= n_cols_validity_table + message_appended <- "Table must have at least" + } else { result <- n_cols_check_table == n_cols_validity_table + message_appended <- "Table must have exactly" + } + if(simply) { - if(result) { + return(result) - message <- paste("ALL GOOD!") + } else if (result) { + + message <- paste("ALL GOOD!") } else { - message <- paste("Rule Broken: Wrong number of columns! Table should have", n_cols_validity_table, "column(s) but has", - n_cols_check_table, "columns.", sep = " ") + message <- paste("Rule Broken: Wrong number of columns!", message_appended, n_cols_validity_table, "column(s) but has", + n_cols_check_table, "columns.", sep = " ") - } + } data.frame("Check" = "Column Number", "Check_Result" = result, "Message" = message, "Columns" = NA) diff --git a/R/filter_column_empty.R b/R/filter_column_empty.R index ff3a00c..29b93c4 100644 --- a/R/filter_column_empty.R +++ b/R/filter_column_empty.R @@ -1,5 +1,10 @@ - +#' @title Filter Values in Column That are NA +#' @param check_table Data Frame. The table that will be checked against the specified rules in the validity table. +#' @param filter_column Character. The column which will be checked. The column must be present in both the check table +#' as well as the validity table +#' @return A logical vector. +#' @export filter_column_empty <- function(check_table, filter_column) { stopifnot(length(filter_column) == 1 & is.character(filter_column)) diff --git a/R/utils.R b/R/utils.R index 789e02f..379435a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -5,8 +5,8 @@ #' @param path_validity Path to the validity table that stores the accompanying xafty rules #' @param file_ending Character. Valid input: "xlsx", "csv_comma", "csv_semicolon". You need to make sure that the file ending #' matches the actual file. +#' @importFrom readxl read_xlsx #' @importFrom utils read.csv read.csv2 -#' @export read_example_data <- function(path_check = "inst/extdata/example_data.xlsx", path_validity = "inst/extdata/example_validity.csv", file_ending = "xlsx") { @@ -269,7 +269,7 @@ is.POSIXct_xafty <- function(datetimes, tz = "") { } -#' @title Convert Passed Values as POSIXct +#' @title Convert Passed Values to POSIXct #' @param datetimes Character vector of date time values to be parsed #' @param tz Timezone for the POSIXct values. Default is UTC as.POSIXct_xafty <- function(datetimes, tz = "") { diff --git a/README.Rmd b/README.Rmd index 53d0a5f..1aa7f1a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -54,7 +54,7 @@ remotes::install_github("davidcrone/xafty") The check table contains the data that will be checked. Usually, it is data read in from a spreadsheet. -In order to explain all the possible rules and their variants, we will use a reduced 'mtcars' data set as an example. +In order to introduce all possible rules and their variants, we will use a reduced 'mtcars' data set as an example. ``` r data(mtcars) @@ -154,16 +154,16 @@ check_column_exactinput(check_table = mtcars, validity_table = validity_table) ### Check for Pattern Values -Checks whether the provided patterns in the validity table match the data in the check table. Several variant rules exist for this check. +Checks whether the provided patterns in a column of the validity table matches to the corresponding column in the check table. Several variant rules exist for the pattern check. **Rule syntax:** | Rule | Explanation | |------------------|--------------------------------------------------------------------------------------------| -| ##!!strictpattern| All values in the check table must match the provided patterns in the validity table | -| ##!!rowpattern | Every row of the column in the check table must match to at least to one pattern in the validity table | -| ##!!anypattern | At least one value in the check table must match to a pattern in the validity table | -| ##!!eachpattern | Every pattern provided in the validity table must match at least once in the check table | +| ##!!strictpattern| All values in the column must match to every pattern provided in the validity table | +| ##!!rowpattern | All values of the column must match at least to one pattern in the validity table | +| ##!!anypattern | At least one value in the column must match to any pattern provided in the validity table | +| ##!!eachpattern | Every pattern provided in the validity table must match at least once to any value in the check table | ``` r validity_table <- data.frame("name" = c("##!!text", "##!!notempty", "##!!rowpattern", "Mazda", "Datsun", "Hornet"), @@ -180,11 +180,13 @@ Basic workflow to check a table for its 'validity' before using it in downstream library(xafty) # Load example data -data <- read_example_data() -check_table <- data$check_table -validity_table <- data$validity_table +data(Indometh) +check_table <- Indometh +validity_table <- data.frame("Subject" = c("##!!factor", "##!!notempty", "##!!strictexact", 1:6), + "time" = c("##!!number", "##!!notempty", "##!!anypattern", "8", rep(NA, 5)), + "conc" = c("##!!number", "##!!notempty", rep(NA, 7))) -# Aligns the data type of the check table as specified in the validity table +# Aligns the data type of the check table as specified data types in the validity table check_table <- align_column_types(check_table = check_table, validity_table = validity_table) # Check if the check table is valid diff --git a/README.md b/README.md index 8bbe85f..0e314cb 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ remotes::install_github("davidcrone/xafty") The check table contains the data that will be checked. Usually, it is data read in from a spreadsheet. -In order to explain all the possible rules and their variants, we will -use a reduced ‘mtcars’ data set as an example. +In order to introduce all possible rules and their variants, we will use +a reduced ‘mtcars’ data set as an example. ``` r data(mtcars) @@ -155,17 +155,18 @@ check_column_exactinput(check_table = mtcars, validity_table = validity_table) ### Check for Pattern Values -Checks whether the provided patterns in the validity table match the -data in the check table. Several variant rules exist for this check. +Checks whether the provided patterns in a column of the validity table +matches to the corresponding column in the check table. Several variant +rules exist for the pattern check. **Rule syntax:** -| Rule | Explanation | -|--------------------|--------------------------------------------------------------------------------------------------------| -| \##!!strictpattern | All values in the check table must match the provided patterns in the validity table | -| \##!!rowpattern | Every row of the column in the check table must match to at least to one pattern in the validity table | -| \##!!anypattern | At least one value in the check table must match to a pattern in the validity table | -| \##!!eachpattern | Every pattern provided in the validity table must match at least once in the check table | +| Rule | Explanation | +|--------------------|-------------------------------------------------------------------------------------------------------| +| \##!!strictpattern | All values in the column must match to every pattern provided in the validity table | +| \##!!rowpattern | All values of the column must match at least to one pattern in the validity table | +| \##!!anypattern | At least one value in the column must match to any pattern provided in the validity table | +| \##!!eachpattern | Every pattern provided in the validity table must match at least once to any value in the check table | ``` r validity_table <- data.frame("name" = c("##!!text", "##!!notempty", "##!!rowpattern", "Mazda", "Datsun", "Hornet"), @@ -183,11 +184,13 @@ downstream code: library(xafty) # Load example data -data <- read_example_data() -check_table <- data$check_table -validity_table <- data$validity_table +data(Indometh) +check_table <- Indometh +validity_table <- data.frame("Subject" = c("##!!factor", "##!!notempty", "##!!strictexact", 1:6), + "time" = c("##!!number", "##!!notempty", "##!!anypattern", "8", rep(NA, 5)), + "conc" = c("##!!number", "##!!notempty", rep(NA, 7))) -# Aligns the data type of the check table as specified in the validity table +# Aligns the data type of the check table as specified data types in the validity table check_table <- align_column_types(check_table = check_table, validity_table = validity_table) # Check if the check table is valid diff --git a/man/as.POSIXct_xafty.Rd b/man/as.POSIXct_xafty.Rd index 8a9cb9d..023a841 100644 --- a/man/as.POSIXct_xafty.Rd +++ b/man/as.POSIXct_xafty.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/utils.R \name{as.POSIXct_xafty} \alias{as.POSIXct_xafty} -\title{Convert Passed Values as POSIXct} +\title{Convert Passed Values to POSIXct} \usage{ as.POSIXct_xafty(datetimes, tz = "") } @@ -12,5 +12,5 @@ as.POSIXct_xafty(datetimes, tz = "") \item{tz}{Timezone for the POSIXct values. Default is UTC} } \description{ -Convert Passed Values as POSIXct +Convert Passed Values to POSIXct } diff --git a/man/check_column_number.Rd b/man/check_column_number.Rd index f4ef88c..8ca3a22 100644 --- a/man/check_column_number.Rd +++ b/man/check_column_number.Rd @@ -4,7 +4,12 @@ \alias{check_column_number} \title{Check for Correct Number of Columns} \usage{ -check_column_number(check_table, validity_table, check_type = "equal") +check_column_number( + check_table, + validity_table, + check_type = "equal", + simply = FALSE +) } \arguments{ \item{check_table}{Data Frame. The table that will be checked against the specified rules in the validity table.} @@ -17,6 +22,11 @@ Following Parameters are allowed: \item "equal": If number of columns in both tables should be equal \item "larger": If number of columns in both tables should be equal or larger }} + +\item{simply}{Boolean. Changes the return value of the function to TRUE or FALSE, whether the test passes.} +} +\value{ +A data.frame if simply is FALSE and a Boolean of length 1 if simply is TRUE } \description{ Check for Correct Number of Columns diff --git a/man/filter_column_empty.Rd b/man/filter_column_empty.Rd new file mode 100644 index 0000000..e1beb6a --- /dev/null +++ b/man/filter_column_empty.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter_column_empty.R +\name{filter_column_empty} +\alias{filter_column_empty} +\title{Filter Values in Column That are NA} +\usage{ +filter_column_empty(check_table, filter_column) +} +\arguments{ +\item{check_table}{Data Frame. The table that will be checked against the specified rules in the validity table.} + +\item{filter_column}{Character. The column which will be checked. The column must be present in both the check table +as well as the validity table} +} +\value{ +A logical vector. +} +\description{ +Filter Values in Column That are NA +} diff --git a/tests/testthat/test-check_column_number.R b/tests/testthat/test-check_column_number.R index 0b32697..de27694 100644 --- a/tests/testthat/test-check_column_number.R +++ b/tests/testthat/test-check_column_number.R @@ -23,3 +23,41 @@ test_that("Incorrect number of columns is correctly detected", { expect_equal(checK_result$Columns, NA) }) + +test_that("Correct number of columns is correctly detected", { + + check_table <- data.frame("Name" = c("David", "Diana", "Marcel"), + "Age" = c(22, 18, 25)) + validity_table <- data.frame("Name" = "##!!text", "Age" = "##!!number") + + checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, simply = TRUE) + + expect_true(checK_result) + +}) + +test_that("Incorrect number of columns is correctly detected", { + + check_table <- data.frame("Name" = c("David", "Diana", "Marcel"), + "Age" = c(22, 18, 25)) + validity_table <- data.frame("Name" = "##!!text") + + checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, simply = TRUE) + + expect_false(checK_result) + +}) + +test_that("Parameter 'larger' works as expected", { + + check_table <- data.frame("Name" = c("David", "Diana", "Marcel"), + "Age" = c(22, 18, 25), + "Nickname" = c("Davy", "Bogini", "Marcello")) + validity_table <- data.frame("Name" = "##!!text", "Age" = "##!!number") + + checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, check_type = "larger") + + expect_true(checK_result$Check_Result) + expect_equal(checK_result$Columns, NA) + +}) diff --git a/tests/testthat/test-filter_column_patterninput.R b/tests/testthat/test-filter_column_patterninput.R index f24e7cd..6120bf9 100644 --- a/tests/testthat/test-filter_column_patterninput.R +++ b/tests/testthat/test-filter_column_patterninput.R @@ -18,7 +18,7 @@ test_that("Filter for values are correctly identified", { check_table <- data.frame("Name" = as.character(c("David", "Diana", "Marcel")), "Has_Birthday" = factor(c("no", "maybe", "no"))) - validity_table <- data.frame("Name" = c("##!!text", "##!!strictpattern", "a", NA), + validity_table <- data.frame("Name" = c("##!!text", "##!!strictpattern", "a", "i"), "Has_Birthday" = c("##!!factor", "##!!strictpattern", "n", "o")) check_result <- filter_column_patterninput(check_table = check_table, validity_table = validity_table, filter_column = "Has_Birthday")