Skip to content

Commit

Permalink
added new simply argument to column number test; exported filter_empt…
Browse files Browse the repository at this point in the history
…y function; made readme more comprehensible
  • Loading branch information
The Rational Optimist committed Oct 28, 2023
1 parent c259ed2 commit 38393ad
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 40 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: xafty
Title: Lightweight package to validate a table
Version: 1.0.0
Title: Lightweight package to easily validate a table
Version: 1.1.0
Author: David Crone
Maintainer: The package maintainer <davidjvcrone@gmail.com>
Description: The package is designed to facilitate task automation for
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ export(check_column_notempty)
export(check_column_number)
export(check_column_types)
export(check_validity)
export(filter_column_empty)
export(filter_column_exactinput)
export(filter_column_patterninput)
export(filter_column_type)
export(read_example_data)
importFrom(readxl,read_xlsx)
importFrom(stats,setNames)
importFrom(utils,read.csv)
importFrom(utils,read.csv2)
21 changes: 15 additions & 6 deletions R/check_column_number.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
#' \item "equal": If number of columns in both tables should be equal
#' \item "larger": If number of columns in both tables should be equal or larger
#' }
#' @param simply Boolean. Changes the return value of the function to TRUE or FALSE, whether the test passes.
#' @return A data.frame if simply is FALSE and a Boolean of length 1 if simply is TRUE
#' @export
check_column_number <- function(check_table, validity_table, check_type = "equal") {
check_column_number <- function(check_table, validity_table, check_type = "equal", simply = FALSE) {

n_cols_check_table <- ncol(check_table)
n_cols_validity_table <- ncol(validity_table)
Expand All @@ -18,23 +20,30 @@ check_column_number <- function(check_table, validity_table, check_type = "equal

result <- n_cols_check_table >= n_cols_validity_table

message_appended <- "Table must have at least"

} else {

result <- n_cols_check_table == n_cols_validity_table

message_appended <- "Table must have exactly"

}

if(simply) {

if(result) {
return(result)

message <- paste("ALL GOOD!")
} else if (result) {

message <- paste("ALL GOOD!")

} else {

message <- paste("Rule Broken: Wrong number of columns! Table should have", n_cols_validity_table, "column(s) but has",
n_cols_check_table, "columns.", sep = " ")
message <- paste("Rule Broken: Wrong number of columns!", message_appended, n_cols_validity_table, "column(s) but has",
n_cols_check_table, "columns.", sep = " ")

}
}

data.frame("Check" = "Column Number", "Check_Result" = result, "Message" = message, "Columns" = NA)

Expand Down
7 changes: 6 additions & 1 deletion R/filter_column_empty.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@


#' @title Filter Values in Column That are NA
#' @param check_table Data Frame. The table that will be checked against the specified rules in the validity table.
#' @param filter_column Character. The column which will be checked. The column must be present in both the check table
#' as well as the validity table
#' @return A logical vector.
#' @export
filter_column_empty <- function(check_table, filter_column) {

stopifnot(length(filter_column) == 1 & is.character(filter_column))
Expand Down
4 changes: 2 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
#' @param path_validity Path to the validity table that stores the accompanying xafty rules
#' @param file_ending Character. Valid input: "xlsx", "csv_comma", "csv_semicolon". You need to make sure that the file ending
#' matches the actual file.
#' @importFrom readxl read_xlsx
#' @importFrom utils read.csv read.csv2
#' @export
read_example_data <- function(path_check = "inst/extdata/example_data.xlsx",
path_validity = "inst/extdata/example_validity.csv", file_ending = "xlsx") {

Expand Down Expand Up @@ -269,7 +269,7 @@ is.POSIXct_xafty <- function(datetimes, tz = "") {

}

#' @title Convert Passed Values as POSIXct
#' @title Convert Passed Values to POSIXct
#' @param datetimes Character vector of date time values to be parsed
#' @param tz Timezone for the POSIXct values. Default is UTC
as.POSIXct_xafty <- function(datetimes, tz = "") {
Expand Down
22 changes: 12 additions & 10 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ remotes::install_github("davidcrone/xafty")

The check table contains the data that will be checked. Usually, it is data read in from a spreadsheet.

In order to explain all the possible rules and their variants, we will use a reduced 'mtcars' data set as an example.
In order to introduce all possible rules and their variants, we will use a reduced 'mtcars' data set as an example.

``` r
data(mtcars)
Expand Down Expand Up @@ -154,16 +154,16 @@ check_column_exactinput(check_table = mtcars, validity_table = validity_table)

### Check for Pattern Values

Checks whether the provided patterns in the validity table match the data in the check table. Several variant rules exist for this check.
Checks whether the provided patterns in a column of the validity table matches to the corresponding column in the check table. Several variant rules exist for the pattern check.

**Rule syntax:**

| Rule | Explanation |
|------------------|--------------------------------------------------------------------------------------------|
| ##!!strictpattern| All values in the check table must match the provided patterns in the validity table |
| ##!!rowpattern | Every row of the column in the check table must match to at least to one pattern in the validity table |
| ##!!anypattern | At least one value in the check table must match to a pattern in the validity table |
| ##!!eachpattern | Every pattern provided in the validity table must match at least once in the check table |
| ##!!strictpattern| All values in the column must match to every pattern provided in the validity table |
| ##!!rowpattern | All values of the column must match at least to one pattern in the validity table |
| ##!!anypattern | At least one value in the column must match to any pattern provided in the validity table |
| ##!!eachpattern | Every pattern provided in the validity table must match at least once to any value in the check table |

``` r
validity_table <- data.frame("name" = c("##!!text", "##!!notempty", "##!!rowpattern", "Mazda", "Datsun", "Hornet"),
Expand All @@ -180,11 +180,13 @@ Basic workflow to check a table for its 'validity' before using it in downstream
library(xafty)

# Load example data
data <- read_example_data()
check_table <- data$check_table
validity_table <- data$validity_table
data(Indometh)
check_table <- Indometh
validity_table <- data.frame("Subject" = c("##!!factor", "##!!notempty", "##!!strictexact", 1:6),
"time" = c("##!!number", "##!!notempty", "##!!anypattern", "8", rep(NA, 5)),
"conc" = c("##!!number", "##!!notempty", rep(NA, 7)))

# Aligns the data type of the check table as specified in the validity table
# Aligns the data type of the check table as specified data types in the validity table
check_table <- align_column_types(check_table = check_table, validity_table = validity_table)

# Check if the check table is valid
Expand Down
31 changes: 17 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ remotes::install_github("davidcrone/xafty")
The check table contains the data that will be checked. Usually, it is
data read in from a spreadsheet.

In order to explain all the possible rules and their variants, we will
use a reduced ‘mtcars’ data set as an example.
In order to introduce all possible rules and their variants, we will use
a reduced ‘mtcars’ data set as an example.

``` r
data(mtcars)
Expand Down Expand Up @@ -155,17 +155,18 @@ check_column_exactinput(check_table = mtcars, validity_table = validity_table)

### Check for Pattern Values

Checks whether the provided patterns in the validity table match the
data in the check table. Several variant rules exist for this check.
Checks whether the provided patterns in a column of the validity table
matches to the corresponding column in the check table. Several variant
rules exist for the pattern check.

**Rule syntax:**

| Rule | Explanation |
|--------------------|--------------------------------------------------------------------------------------------------------|
| \##!!strictpattern | All values in the check table must match the provided patterns in the validity table |
| \##!!rowpattern | Every row of the column in the check table must match to at least to one pattern in the validity table |
| \##!!anypattern | At least one value in the check table must match to a pattern in the validity table |
| \##!!eachpattern | Every pattern provided in the validity table must match at least once in the check table |
| Rule | Explanation |
|--------------------|-------------------------------------------------------------------------------------------------------|
| \##!!strictpattern | All values in the column must match to every pattern provided in the validity table |
| \##!!rowpattern | All values of the column must match at least to one pattern in the validity table |
| \##!!anypattern | At least one value in the column must match to any pattern provided in the validity table |
| \##!!eachpattern | Every pattern provided in the validity table must match at least once to any value in the check table |

``` r
validity_table <- data.frame("name" = c("##!!text", "##!!notempty", "##!!rowpattern", "Mazda", "Datsun", "Hornet"),
Expand All @@ -183,11 +184,13 @@ downstream code:
library(xafty)

# Load example data
data <- read_example_data()
check_table <- data$check_table
validity_table <- data$validity_table
data(Indometh)
check_table <- Indometh
validity_table <- data.frame("Subject" = c("##!!factor", "##!!notempty", "##!!strictexact", 1:6),
"time" = c("##!!number", "##!!notempty", "##!!anypattern", "8", rep(NA, 5)),
"conc" = c("##!!number", "##!!notempty", rep(NA, 7)))

# Aligns the data type of the check table as specified in the validity table
# Aligns the data type of the check table as specified data types in the validity table
check_table <- align_column_types(check_table = check_table, validity_table = validity_table)

# Check if the check table is valid
Expand Down
4 changes: 2 additions & 2 deletions man/as.POSIXct_xafty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion man/check_column_number.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/filter_column_empty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions tests/testthat/test-check_column_number.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,41 @@ test_that("Incorrect number of columns is correctly detected", {
expect_equal(checK_result$Columns, NA)

})

test_that("Correct number of columns is correctly detected", {

check_table <- data.frame("Name" = c("David", "Diana", "Marcel"),
"Age" = c(22, 18, 25))
validity_table <- data.frame("Name" = "##!!text", "Age" = "##!!number")

checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, simply = TRUE)

expect_true(checK_result)

})

test_that("Incorrect number of columns is correctly detected", {

check_table <- data.frame("Name" = c("David", "Diana", "Marcel"),
"Age" = c(22, 18, 25))
validity_table <- data.frame("Name" = "##!!text")

checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, simply = TRUE)

expect_false(checK_result)

})

test_that("Parameter 'larger' works as expected", {

check_table <- data.frame("Name" = c("David", "Diana", "Marcel"),
"Age" = c(22, 18, 25),
"Nickname" = c("Davy", "Bogini", "Marcello"))
validity_table <- data.frame("Name" = "##!!text", "Age" = "##!!number")

checK_result <- check_column_number(check_table = check_table, validity_table = validity_table, check_type = "larger")

expect_true(checK_result$Check_Result)
expect_equal(checK_result$Columns, NA)

})
2 changes: 1 addition & 1 deletion tests/testthat/test-filter_column_patterninput.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ test_that("Filter for values are correctly identified", {
check_table <- data.frame("Name" = as.character(c("David", "Diana", "Marcel")),
"Has_Birthday" = factor(c("no", "maybe", "no")))

validity_table <- data.frame("Name" = c("##!!text", "##!!strictpattern", "a", NA),
validity_table <- data.frame("Name" = c("##!!text", "##!!strictpattern", "a", "i"),
"Has_Birthday" = c("##!!factor", "##!!strictpattern", "n", "o"))

check_result <- filter_column_patterninput(check_table = check_table, validity_table = validity_table, filter_column = "Has_Birthday")
Expand Down

0 comments on commit 38393ad

Please sign in to comment.