Skip to content

Commit

Permalink
Fix time data types in contexts getting reformatted (#88)
Browse files Browse the repository at this point in the history
- Fixes #49
- Add test for substituting a time value
- Update context categories for `metadata_add_contexts`
- Convert context values to character type, preventing time data types from converting weirdly when converted to yaml in `write_metadata`
- Remove "time" values from `dataset_test` checks of allowable categorical values
- Add a note to the user when time data types get reformatted
- Partial solution to #87 where substitutions for time variables (e.g. `flowering_time`, `fruiting_time`) fail `dataset_test`
  • Loading branch information
yangsophieee authored Oct 12, 2023
1 parent 2ac30e0 commit 96b6abc
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 20 deletions.
18 changes: 10 additions & 8 deletions R/process.R
Original file line number Diff line number Diff line change
Expand Up @@ -687,9 +687,9 @@ process_create_context_ids <- function(data, contexts) {
dplyr::select(dplyr::all_of(c("context_property", "category", "value"))) %>%
dplyr::distinct()

categories <- c(
"plot_context", "treatment_context", "entity_context",
"temporal_context", "method_context") %>%
categories <-
c("plot_context", "treatment_context", "entity_context",
"temporal_context", "method_context") %>%
subset(., . %in% tmp$category)

ids <- dplyr::tibble(.rows = nrow(context_cols))
Expand Down Expand Up @@ -1031,12 +1031,12 @@ process_flag_unsupported_values <- function(data, definitions) {
ii <- data[["trait_name"]] == trait

# Only Y,N
i <- ii & is.na(data[["error"]]) & !grepl("^[YyNn]+$", data[["value"]])
i <- ii & is.na(data[["error"]]) & !grepl("^[YyNn]+$", data[["value"]])
data <- data %>%
dplyr::mutate(error = ifelse(i, "Time can only contain Y & Ns", .data$error))

# Must be length 12
i <- ii & is.na(data[["error"]]) & stringr::str_length(data[["value"]]) != 12
i <- ii & is.na(data[["error"]]) & stringr::str_length(data[["value"]]) != 12
data <- data %>%
dplyr::mutate(error = ifelse(i, "Times must be length 12", .data$error))
}
Expand Down Expand Up @@ -1482,15 +1482,17 @@ process_parse_data <- function(data, dataset_id, metadata, contexts, schema) {

# Implement any value changes as per substitutions
if (!is.na(metadata[["substitutions"]][1])) {
substitutions_table <- util_list_to_df2(metadata[["substitutions"]]) %>%
substitutions_table <- util_list_to_df2(metadata[["substitutions"]]) %>%
dplyr::mutate(
find = tolower(.data$find),
replace = tolower(.data$replace)
)

for (i in seq_len(nrow(substitutions_table))) {
j <- which(out[["trait_name"]] == substitutions_table[["trait_name"]][i] &
out[["value"]] == substitutions_table[["find"]][i])
j <- which(
out[["trait_name"]] == substitutions_table[["trait_name"]][i] &
out[["value"]] == substitutions_table[["find"]][i]
)

if (length(j) > 0) {
out[["value"]][j] <- substitutions_table[["replace"]][i]
Expand Down
6 changes: 4 additions & 2 deletions R/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -470,14 +470,16 @@ metadata_add_contexts <- function(dataset_id, overwrite = FALSE, user_responses

var_in <- metadata_user_select_names(
paste("Indicate all columns that contain additional contextual data for ", dataset_id), v)
categories <- c("treatment", "plot", "temporal", "method", "entity_context")
categories <-
c("treatment_context", "plot_context", "temporal_context",
"method_context", "entity_context")

for (i in seq_along(var_in)) {

ii <- n_existing + i
category <- metadata_user_select_names(
paste("What category does context", var_in[i], "fit in?"), categories)
context_values <- data[[var_in[i]]] %>% unique() %>% na.omit()
context_values <- data[[var_in[i]]] %>% unique() %>% na.omit() %>% as.character()

message(sprintf("\tThe following values exist for this context: %s", context_values %>% paste(collapse = ", ")))

Expand Down
18 changes: 9 additions & 9 deletions R/testdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -598,12 +598,13 @@ dataset_test_worker <-
# Look for context values in `find` column
i <- v %in% contextsub[["find"]]

expect_true(all(i),
info = paste0(
f,
" - context names from data file not present in metadata contexts: ",
v[!i]
)
expect_true(
all(i),
info = ifelse(
"hms" %in% class(v),
sprintf("%s - context names from data file not present in metadata contexts: %s\n\n'%s' has been detected as a time data type and reformatted\n\t-> Please make sure context metadata matches reformatting", f, v[!i], j),
sprintf("%s - context names from data file not present in metadata contexts: %s", f, v[!i])
)
)
}
}
Expand Down Expand Up @@ -661,9 +662,8 @@ dataset_test_worker <-
if (!is.null(definitions$elements[[trait]]) &&
definitions$elements[[trait]]$type == "categorical") {
to_check <- x[[trait]]$replace %>% unique()
allowable <-
c(definitions$elements[[trait]]$allowed_values_levels %>% names(),
NA)
to_check <- to_check[!(grepl("^[YyNn]+$", to_check) & stringr::str_length(to_check) == 12)]
allowable <- c(definitions$elements[[trait]]$allowed_values_levels %>% names(), NA)
failing <- to_check[!(
is.na(to_check) |
to_check %in% allowable |
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/examples/Test_2023_1/data.csv
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Melicope elleryana,melell,Atherton,0.075,0.0268,6955,0.346,0.58,1.61,,4201,2860,
Neolitsea dealbata,neodea,Atherton,0.093,0.0164,5228,0.352,0.58,176.1,1686,2747,3049,7504,,,,geophyte,,bottom slope,,9:00:00,instrument 3,,,4,cm,,,,,,
Polyscias australiana,polaus,Atherton,0.079,0.0154,6806,0.397,0.42,8.35,,6008,,6700,,,nnnnnyyyynnnnnnnnnn,tree,female,dune crest,,9:00:00,instrument 3,,4,4,cm,,,,,,
Psychotria sp Utchee Creek,psyspp,Atherton,0.108,0.0179,11157,0.582,0,23.09,,4977,2302,2037,,,,trees,female,dune crest,,,,10,6,4,cm,,,,,,
Rhodomyrtus trineura,rhotri,Atherton,0.129,0.0111,3401,0.763,0.34,,4321,3803,6088,6877,,,,tree,male,,,,,,9,4,cm,,,,,,
Rhodomyrtus trineura,rhotri,Atherton,0.129,0.0111,3401,0.763,0.34,,4321,3803,6088,6877,,,7:00:00,tree,male,,,,,,9,4,cm,,,,,,
Acmena graveolens,acmgra,Cape Tribulation,0.151,0.0155,5246,0.599,0.67,,,3057,2667,7156,,,,,male,,,,,1,10,4,cm,,,,,,
Aleurites rockinghamensis,aleroc,Cape Tribulation,0.113,0.0184,73984,0.28,0.02,7077,,4194,,6209,,,summer,,male,,,,,1,2,4,cm,,,,,,
Alstonia scholaris,alssch,Cape Tribulation,0.107,0.0223,6182,0.361,0.46,1.53,,1854,1682,3932,,,not observed,herbs,male,,,,,1,2,4,cm,,,,,,
Expand Down
3 changes: 3 additions & 0 deletions tests/testthat/examples/Test_2023_1/metadata.yml
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,9 @@ substitutions:
- trait_name: flowering_time
find: not observed
replace: .na
- trait_name: flowering_time
find: 7:00:00
replace: nnnnnnnnnnny
taxonomic_updates:
- find: Homalanthus novoguineensis
replace: Homalanthus novo-guineensis
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/examples/Test_2023_1/output/traits.csv
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ Test_2023_1,Quassia baileyana,130,leaf_N_per_dry_mass,23,mg/g,population,mean,me
Test_2023_1,Quassia baileyana,130,leaf_area,9624,mm2,population,mean,measurement,3,field,adult,04,,,,,02,,,,2004/2004,from Falster_2005_1,01,,Quassia baileyana
Test_2023_1,Quassia baileyana,130,leaf_mass_per_area,86,g/m2,population,mean,measurement,3,field,adult,04,,,,,02,,,,2004/2004,from Falster_2005_1,01,,Quassia baileyana
Test_2023_1,Quassia baileyana,130,wood_density,0.501,mg/mm3,population,mean,measurement,3,field,adult,04,,,,,02,,,,2004/2004,from Falster_2005_1,01,,Quassia baileyana
Test_2023_1,Rhodomyrtus trineura,132,flowering_time,nnnnnnnnnnny,,individual,mode,expert_score,3,field,adult,02,,,,,01,15,,,2004/2004,from Falster_2005_1,01,,Rhodomyrtus trineura
Test_2023_1,Rhodomyrtus trineura,132,huber_value,0.00014541224371092,mm2{sapwood}/mm2{leaf},individual,raw,model_derived,5,field,adult,02,,,,,01,15,,,2002/2002,made-up measurement remarks,04,01,Rhodomyrtus trineura
Test_2023_1,Rhodomyrtus trineura,132,huber_value,0.00014541224371092,mm2{sapwood}/mm2{leaf},individual,mean,model_derived,5,field,adult,02,,,,,01,15,,,2002/2002,made-up measurement remarks,05,,Rhodomyrtus trineura
Test_2023_1,Rhodomyrtus trineura,132,leaf_length,40,mm,individual,mean,measurement,3,field,adult,02,,,,,01,15,,,2004/2004,from Falster_2005_1,01,,Rhodomyrtus trineura
Expand Down

0 comments on commit 96b6abc

Please sign in to comment.