Skip to content

Commit

Permalink
Added new functionality to align column types to handle forced NA con…
Browse files Browse the repository at this point in the history
…version more gracefully; increased version number to 1.4.0
  • Loading branch information
The Rational Optimist committed Dec 30, 2023
1 parent 9fbe14e commit fbf42fd
Show file tree
Hide file tree
Showing 12 changed files with 294 additions and 40 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: xafty
Title: Lightweight package to easily validate a table
Version: 1.3.0
Version: 1.4.0
Author: David Crone
Maintainer: The package maintainer <davidjvcrone@gmail.com>
Description: The package is designed to facilitate task automation for
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export(add_regex_columns_to_validity)
export(align_column_types)
export(as.Date_xafty)
export(as.POSIXct_xafty)
export(as.numeric_xafty)
export(build_xafty_list)
export(build_xafty_test_table)
export(check_column_exactinput)
Expand Down
22 changes: 16 additions & 6 deletions R/check_column_types.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,21 @@ check_column_types <- function(check_table, validity_table, simply = FALSE) {
#' @param validity_table Data table. A validity table that holds the class information for alignment
#' @param date_origin Character. A date string for date conversion giving the number of days since e.g. "1900-01-01". This
#' is only necessary if the excel date is stored as numeric: (32768, 35981). For more information see: ?as.Date
#' @param tryFormats Character vector. Date formats that should be use to try to convert to date
#' @param tryFormats_Date Character vector. Date formats that should be use to try to convert to date
#' @param tryFormats_POSIXct Character vector. POSIXct formats that should be use to try to convert to POSIXct
#' @param tz Timezone for the POSIXct values. Default is UTC
#' @param force_type Boolean. Whether to force the type conversion even if it introduces NAs during type conversion. If TRUE
#' the function keeps the column as is.
#' @export
align_column_types <- function(check_table, validity_table,
align_column_types <- function(check_table, validity_table, force_type = TRUE,
date_origin = "1899-12-30",
tryFormats = c("%d.%m.%Y", "%d/%m/%Y", "%Y-%m-%d", "%Y/%m/%d"),
tryFormats_Date = c("%d.%m.%Y", "%d/%m/%Y", "%Y-%m-%d", "%Y/%m/%d"),
tryFormats_POSIXct = c("%Y-%m-%d %H:%M:%OS",
"%Y/%m/%d %H:%M:%OS",
"%Y-%m-%d %H:%M",
"%Y/%m/%d %H:%M",
"%Y-%m-%d",
"%Y/%m/%d"),
tz = "") {
xafty_syntax <- "##!!"
possible_classes <- c("text", "date", "number", "factor", "datetime")
Expand All @@ -102,11 +111,12 @@ align_column_types <- function(check_table, validity_table,
switch(xafty_data_type,
"##!!text" = check_table[, i] <- as.character(check_table[[i]]),
"##!!date" = check_table[, i] <- as.Date_xafty(check_table[[i]],
date_origin = date_origin, tryFormats = tryFormats
force_type = force_type, date_origin = date_origin, tryFormats = tryFormats_Date
),
"##!!number" = check_table[, i] <- as.numeric(check_table[[i]]),
"##!!number" = check_table[, i] <- as.numeric_xafty(check_table[[i]], force_type = force_type),
"##!!factor" = check_table[, i] <- as.factor(check_table[[i]]),
"##!!datetime" = check_table[, i] <- as.POSIXct_xafty(check_table[[i]], tz = tz)
"##!!datetime" = check_table[, i] <- as.POSIXct_xafty(check_table[[i]], force_type = force_type,
tryFormats = tryFormats_POSIXct, tz = tz)
)
}
}
Expand Down
160 changes: 130 additions & 30 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,28 +184,65 @@ is.Date_xafty <- function(dates, date_origin = "1899-12-30", tryFormats = c("%Y-
#' @param dates Character vector of Dates to be Converted
#' @param date_origin Character. The date from which numeric dates will be converted into ISO-Date format
#' @param tryFormats Character vector. Date formats that should be use to try to convert to date
#' @param force_type Boolean. Whether to force the type conversion even if it introduces NAs during type conversion. If TRUE
#' the function keeps the column as is.
#' @return An equally length date vector, \code{NA} when the value could not be converted to date
#' @export
as.Date_xafty <- function(dates, date_origin = "1899-12-30", tryFormats = c("%Y-%m-%d", "%d.%m.%Y", "%d/%m/%Y", "%Y/%m/%d")) {
xafty_column <- sapply(dates, \(date) {
tryCatch(
{
as.Date(date, tryFormats = tryFormats)
},
error = function(e) {
numeric_date <- suppressWarnings(as.numeric(date))
if (is.na(numeric_date)) {
return(NA)
as.Date_xafty <- function(dates, date_origin = "1899-12-30",
force_type = TRUE, tryFormats = c("%Y-%m-%d", "%d.%m.%Y", "%d/%m/%Y", "%Y/%m/%d")) {

if (force_type) {
xafty_column <- sapply(dates, \(date) {
tryCatch(
{
as.Date(date, tryFormats = tryFormats)
},
error = function(e) {
numeric_date <- suppressWarnings(as.numeric(date))
if (is.na(numeric_date)) {
return(NA)
}

as.Date(numeric_date, origin = date_origin)
}
)
})

as.Date(numeric_date, origin = date_origin)
}
)
})
names(xafty_column) <- NULL

as.Date(xafty_column, origin = "1970-01-01")

} else {

na_position <- which(is.na(dates))
xafty_column <- sapply(dates, \(date) {
tryCatch(
{
as.Date(date, tryFormats = tryFormats)
},
error = function(e) {
numeric_date <- suppressWarnings(as.numeric(date))
if (is.na(numeric_date)) {
return(NA)
}

as.Date(numeric_date, origin = date_origin)
}
)
})

na_position_after <- which(is.na(xafty_column))

if(sum(na_position_after) > sum(na_position)) {
return(dates)
} else {
names(xafty_column) <- NULL
as.Date(xafty_column, origin = "1970-01-01")
}

}

names(xafty_column) <- NULL

as.Date(xafty_column, origin = "1970-01-01")
}

#' @title Check if Passed Values can be Parsed as Numeric
Expand All @@ -225,6 +262,26 @@ is.numeric_xafty <- function(numbers) {
xafty_column
}

#' @title Check if Passed Values can be Parsed as Numeric
#' @param numbers Character vector of Numbers to be converted
#' @param force_type Boolean. Whether to force the type conversion even if it introduces NAs during type conversion. If TRUE
#' the function keeps the column as is.
#' @return An equally length numeric vector if the numerics could be successfully converted to NA.
#' @export
as.numeric_xafty <- function(numbers, force_type = TRUE) {

position_na <- which(is.na(numbers))

position_na_conversion <- which(is.na(suppressWarnings(as.numeric(numbers))))

if(sum(position_na_conversion) > sum(position_na) & !force_type) {
return(numbers)
} else {
as.numeric(numbers)
}

}

#' @title Check if Passed Values can be Parsed as POSIXct
#' @param datetimes Character vector of date time values to be parsed
#' @param tz Timezone for the POSIXct values. Default is UTC
Expand Down Expand Up @@ -252,22 +309,59 @@ is.POSIXct_xafty <- function(datetimes, tz = "") {
#' @title Convert Passed Values to POSIXct
#' @param datetimes Character vector of date time values to be parsed
#' @param tz Timezone for the POSIXct values. Default is UTC
#' @param force_type Boolean. Whether to force the type conversion even if it introduces NAs during type conversion. If TRUE
#' the function keeps the column as is.
#' @param tryFormats Character vector. POSIXct formats that should be use to try to convert to POSIXct
#' the function keeps the column as is
#' @export
as.POSIXct_xafty <- function(datetimes, tz = "") {
xafty_column <- sapply(datetimes, \(datetime) {
tryCatch(
{
as.POSIXct(datetime, tz = tz)
},
error = function(e) {
NA
}
)
})
as.POSIXct_xafty <- function(datetimes, force_type = TRUE,
tryFormats = c("%Y-%m-%d %H:%M:%OS",
"%Y/%m/%d %H:%M:%OS",
"%Y-%m-%d %H:%M",
"%Y/%m/%d %H:%M",
"%Y-%m-%d",
"%Y/%m/%d"),
tz = "") {

if (force_type) {
xafty_column <- sapply(datetimes, \(datetime) {
tryCatch(
{
as.POSIXct(datetime, tz = tz)
},
error = function(e) {
NA
}
)
})

names(xafty_column) <- NULL
names(xafty_column) <- NULL
as.POSIXct(xafty_column, tryFormats = tryFormats)

} else {
na_position <- which(is.na(datetimes))

xafty_column <- sapply(datetimes, \(datetime) {
tryCatch(
{
as.POSIXct(datetime, tz = tz)
},
error = function(e) {
NA
}
)
})

na_position_after <- which(is.na(xafty_column))

if(sum(na_position_after) > sum(na_position)) {
return(datetimes)
} else {
names(xafty_column) <- NULL
as.POSIXct(xafty_column, tryFormats = tryFormats)
}
}

as.POSIXct(xafty_column)
}

#' @title Build a List of Test Results
Expand All @@ -277,6 +371,7 @@ as.POSIXct_xafty <- function(datetimes, tz = "") {
#'
#' @param check_table Data Frame. The table that will be checked against the specified rules in the validity table.
#' @param validity_table Data Frame. A validation table that stores the rules that the check table will be checked against.
#' @param align_columns Boolean. Whether to align column types before checking for xafty rules. Does not coerce values to NA!
#' @param meta_tests_name Character. Name of the list item that stores all meta_tests. The parameter is there to help avoid naming
#' conflicts with column names from the check table or validity table.
#' @param check_names Boolean. Adds a meta test that checks whether all column names of the validity table are present in
Expand All @@ -285,12 +380,17 @@ as.POSIXct_xafty <- function(datetimes, tz = "") {
#' larger than the columns in the validity table.
#' @returns A list.
#' @export
build_xafty_list <- function(check_table, validity_table,
build_xafty_list <- function(check_table, validity_table, align_columns = TRUE,
meta_tests_name = "meta_tests", check_names = TRUE, check_number = TRUE) {

validity_table <- add_regex_columns_to_validity(check_table = check_table, validity_table = validity_table,
multiple = "remove")

if(align_columns) {
check_table <- align_column_types(check_table = check_table, validity_table = validity_table,
force_type = FALSE)
}

base_column_list <- list()

if (check_names || check_number) {
Expand Down
1 change: 1 addition & 0 deletions R/xafty_rules.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' \item{description}{A character vector of rule description in prosa.}
#' \item{check_function}{A list of functions for check purposes.}
#' \item{filter_function}{A list of functions to filter values.}
#' \item{change_type_function}{A list of functions change data to appropriate types.}
#' }
#'
#' @source The data is curated by xafty developers and grows as new rules are added.
Expand Down
12 changes: 10 additions & 2 deletions man/align_column_types.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/as.Date_xafty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 13 additions & 1 deletion man/as.POSIXct_xafty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/as.numeric_xafty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/build_xafty_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/xafty_rules_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit fbf42fd

Please sign in to comment.