added new simply argument to column number test; exported filter_empt…

…y function; made readme more comprehensible
davidcrone · Oct 28, 2023 · 38393ad · 38393ad
1 parent c259ed2
commit 38393ad
Show file tree

Hide file tree

Showing 12 changed files with 128 additions and 40 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: xafty
-Title: Lightweight package to validate a table
-Version: 1.0.0
+Title: Lightweight package to easily validate a table
+Version: 1.1.0
 Author: David Crone
 Maintainer: The package maintainer <davidjvcrone@gmail.com>
 Description: The package is designed to facilitate task automation for

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,10 +6,11 @@ export(check_column_notempty)
 export(check_column_number)
 export(check_column_types)
 export(check_validity)
+export(filter_column_empty)
 export(filter_column_exactinput)
 export(filter_column_patterninput)
 export(filter_column_type)
-export(read_example_data)
+importFrom(readxl,read_xlsx)
 importFrom(stats,setNames)
 importFrom(utils,read.csv)
 importFrom(utils,read.csv2)
diff --git a/R/check_column_number.R b/R/check_column_number.R
@@ -8,8 +8,10 @@
 #'  \item "equal": If number of columns in both tables should be equal
 #'  \item "larger": If number of columns in both tables should be equal or larger
 #' }
+#' @param simply Boolean. Changes the return value of the function to TRUE or FALSE, whether the test passes.
+#' @return A data.frame if simply is FALSE and a Boolean of length 1 if simply is TRUE
 #' @export
-check_column_number <- function(check_table, validity_table, check_type = "equal") {
+check_column_number <- function(check_table, validity_table, check_type = "equal", simply = FALSE) {
 
   n_cols_check_table <- ncol(check_table)
   n_cols_validity_table <- ncol(validity_table)
@@ -18,23 +20,30 @@ check_column_number <- function(check_table, validity_table, check_type = "equal
 
     result <- n_cols_check_table >= n_cols_validity_table
 
+    message_appended <- "Table must have at least"
+
   } else {
 
     result <- n_cols_check_table == n_cols_validity_table
 
+    message_appended <- "Table must have exactly"
+
   }
 
+  if(simply) {
 
-  if(result) {
+    return(result)
 
-    message <- paste("ALL GOOD!")
+  } else if (result) {
+
+      message <- paste("ALL GOOD!")
 
   } else {
 
-    message <- paste("Rule Broken: Wrong number of columns! Table should have", n_cols_validity_table, "column(s) but has",
-                     n_cols_check_table, "columns.", sep = " ")
+      message <- paste("Rule Broken: Wrong number of columns!", message_appended, n_cols_validity_table, "column(s) but has",
+                       n_cols_check_table, "columns.", sep = " ")
 
-  }
+    }
 
   data.frame("Check" = "Column Number", "Check_Result" = result, "Message" = message, "Columns" = NA)
 

diff --git a/R/filter_column_empty.R b/R/filter_column_empty.R
@@ -1,5 +1,10 @@
 
-
+#' @title Filter Values in Column That are NA
+#' @param check_table Data Frame. The table that will be checked against the specified rules in the validity table.
+#' @param filter_column Character. The column which will be checked. The column must be present in both the check table
+#' as well as the validity table
+#' @return A logical vector.
+#' @export
 filter_column_empty <- function(check_table, filter_column) {
 
   stopifnot(length(filter_column) == 1 & is.character(filter_column))

diff --git a/R/utils.R b/R/utils.R
@@ -5,8 +5,8 @@
 #' @param path_validity Path to the validity table that stores the accompanying xafty rules
 #' @param file_ending Character. Valid input: "xlsx", "csv_comma", "csv_semicolon". You need to make sure that the file ending
 #' matches the actual file.
+#' @importFrom readxl read_xlsx
 #' @importFrom utils read.csv read.csv2
-#' @export
 read_example_data <- function(path_check = "inst/extdata/example_data.xlsx",
                                     path_validity = "inst/extdata/example_validity.csv", file_ending = "xlsx") {
 
@@ -269,7 +269,7 @@ is.POSIXct_xafty <- function(datetimes, tz = "") {
 
 }
 
-#' @title Convert Passed Values as POSIXct
+#' @title Convert Passed Values to POSIXct
 #' @param datetimes Character vector of date time values to be parsed
 #' @param tz Timezone for the POSIXct values. Default is UTC
 as.POSIXct_xafty <- function(datetimes, tz = "") {

diff --git a/README.Rmd b/README.Rmd
@@ -54,7 +54,7 @@ remotes::install_github("davidcrone/xafty")
 
 The check table contains the data that will be checked. Usually, it is data read in from a spreadsheet. 
 
-In order to explain all the possible rules and their variants, we will use a reduced 'mtcars' data set as an example.
+In order to introduce all possible rules and their variants, we will use a reduced 'mtcars' data set as an example.
 
 ``` r 
 data(mtcars)
@@ -154,16 +154,16 @@ check_column_exactinput(check_table = mtcars, validity_table = validity_table)
 
 ### Check for Pattern Values
 
-Checks whether the provided patterns in the validity table match the data in the check table. Several variant rules exist for this check. 
+Checks whether the provided patterns in a column of the validity table matches to the corresponding column in the check table. Several variant rules exist for the pattern check. 
 
 **Rule syntax:**
 
 | Rule             | Explanation                                                                                |
 |------------------|--------------------------------------------------------------------------------------------|
-| ##!!strictpattern| All values in the check table must match the provided patterns in the validity table       | 
-| ##!!rowpattern   | Every row of the column in the check table must match to at least to one pattern in the validity table |
-| ##!!anypattern   | At least one value in the check table must match to a pattern in the validity table        |
-| ##!!eachpattern  | Every pattern provided in the validity table must match at least once in the check table   |
+| ##!!strictpattern| All values in the column must match to every pattern provided in the validity table        | 
+| ##!!rowpattern   | All values of the column must match at least to one pattern in the validity table          |
+| ##!!anypattern   | At least one value in the column must match to any pattern provided in the validity table  |
+| ##!!eachpattern  | Every pattern provided in the validity table must match at least once to any value in the check table   |
 
 ``` r
 validity_table <- data.frame("name" = c("##!!text", "##!!notempty", "##!!rowpattern", "Mazda", "Datsun", "Hornet"), 
@@ -180,11 +180,13 @@ Basic workflow to check a table for its 'validity' before using it in downstream
 library(xafty)
 
 # Load example data
-data <- read_example_data() 
-check_table <- data$check_table
-validity_table <- data$validity_table
+data(Indometh)
+check_table <- Indometh
+validity_table <- data.frame("Subject" = c("##!!factor", "##!!notempty", "##!!strictexact", 1:6),
+                             "time" = c("##!!number", "##!!notempty", "##!!anypattern", "8", rep(NA, 5)),
+                             "conc" = c("##!!number", "##!!notempty", rep(NA, 7)))
 
-# Aligns the data type of the check table as specified in the validity table
+# Aligns the data type of the check table as specified data types in the validity table
 check_table <- align_column_types(check_table = check_table, validity_table = validity_table)
 
 # Check if the check table is valid

diff --git a/README.md b/README.md
@@ -45,8 +45,8 @@ remotes::install_github("davidcrone/xafty")
 The check table contains the data that will be checked. Usually, it is
 data read in from a spreadsheet.
 
-In order to explain all the possible rules and their variants, we will
-use a reduced ‘mtcars’ data set as an example.
+In order to introduce all possible rules and their variants, we will use
+a reduced ‘mtcars’ data set as an example.
 
 ``` r
 data(mtcars)
@@ -155,17 +155,18 @@ check_column_exactinput(check_table = mtcars, validity_table = validity_table)
 
 ### Check for Pattern Values
 
-Checks whether the provided patterns in the validity table match the
-data in the check table. Several variant rules exist for this check.
+Checks whether the provided patterns in a column of the validity table
+matches to the corresponding column in the check table. Several variant
+rules exist for the pattern check.
 
 **Rule syntax:**
 
-| Rule               | Explanation                                                                                            |
-|--------------------|--------------------------------------------------------------------------------------------------------|
-| \##!!strictpattern | All values in the check table must match the provided patterns in the validity table                   |
-| \##!!rowpattern    | Every row of the column in the check table must match to at least to one pattern in the validity table |
-| \##!!anypattern    | At least one value in the check table must match to a pattern in the validity table                    |
-| \##!!eachpattern   | Every pattern provided in the validity table must match at least once in the check table               |
+| Rule               | Explanation                                                                                           |
+|--------------------|-------------------------------------------------------------------------------------------------------|
+| \##!!strictpattern | All values in the column must match to every pattern provided in the validity table                   |
+| \##!!rowpattern    | All values of the column must match at least to one pattern in the validity table                     |
+| \##!!anypattern    | At least one value in the column must match to any pattern provided in the validity table             |
+| \##!!eachpattern   | Every pattern provided in the validity table must match at least once to any value in the check table |
 
 ``` r
 validity_table <- data.frame("name" = c("##!!text", "##!!notempty", "##!!rowpattern", "Mazda", "Datsun", "Hornet"), 
@@ -183,11 +184,13 @@ downstream code:
 library(xafty)
 
 # Load example data
-data <- read_example_data() 
-check_table <- data$check_table
-validity_table <- data$validity_table
+data(Indometh)
+check_table <- Indometh
+validity_table <- data.frame("Subject" = c("##!!factor", "##!!notempty", "##!!strictexact", 1:6),
+                             "time" = c("##!!number", "##!!notempty", "##!!anypattern", "8", rep(NA, 5)),
+                             "conc" = c("##!!number", "##!!notempty", rep(NA, 7)))
 
-# Aligns the data type of the check table as specified in the validity table
+# Aligns the data type of the check table as specified data types in the validity table
 check_table <- align_column_types(check_table = check_table, validity_table = validity_table)
 
 # Check if the check table is valid

diff --git a/man/as.POSIXct_xafty.Rd b/man/as.POSIXct_xafty.Rd
diff --git a/man/check_column_number.Rd b/man/check_column_number.Rd
diff --git a/man/filter_column_empty.Rd b/man/filter_column_empty.Rd
diff --git a/tests/testthat/test-check_column_number.R b/tests/testthat/test-check_column_number.R
@@ -23,3 +23,41 @@ test_that("Incorrect number of columns is correctly detected", {
   expect_equal(checK_result$Columns, NA)
 
 })
+
+test_that("Correct number of columns is correctly detected", {
+
+  check_table <- data.frame("Name" = c("David", "Diana", "Marcel"),
+                            "Age" = c(22, 18, 25))
+  validity_table <- data.frame("Name" = "##!!text", "Age" = "##!!number")
+
+  checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, simply = TRUE)
+
+  expect_true(checK_result)
+
+})
+
+test_that("Incorrect number of columns is correctly detected", {
+
+  check_table <- data.frame("Name" = c("David", "Diana", "Marcel"),
+                            "Age" = c(22, 18, 25))
+  validity_table <- data.frame("Name" = "##!!text")
+
+  checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, simply = TRUE)
+
+  expect_false(checK_result)
+
+})
+
+test_that("Parameter 'larger' works as expected", {
+
+  check_table <- data.frame("Name" = c("David", "Diana", "Marcel"),
+                            "Age" = c(22, 18, 25),
+                            "Nickname" = c("Davy", "Bogini", "Marcello"))
+  validity_table <- data.frame("Name" = "##!!text", "Age" = "##!!number")
+
+  checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, check_type = "larger")
+
+  expect_true(checK_result$Check_Result)
+  expect_equal(checK_result$Columns, NA)
+
+})
diff --git a/tests/testthat/test-filter_column_patterninput.R b/tests/testthat/test-filter_column_patterninput.R
@@ -18,7 +18,7 @@ test_that("Filter for values are correctly identified", {
   check_table <- data.frame("Name" = as.character(c("David", "Diana", "Marcel")),
                             "Has_Birthday" = factor(c("no", "maybe", "no")))
 
-  validity_table <- data.frame("Name" = c("##!!text", "##!!strictpattern", "a", NA),
+  validity_table <- data.frame("Name" = c("##!!text", "##!!strictpattern", "a", "i"),
                                "Has_Birthday" = c("##!!factor", "##!!strictpattern", "n", "o"))
 
   check_result <- filter_column_patterninput(check_table = check_table, validity_table = validity_table, filter_column = "Has_Birthday")