Skip to content

Commit

Permalink
Merge pull request #210 from njtierney/nabular
Browse files Browse the repository at this point in the history
implement the nabular class, fixes #192
  • Loading branch information
njtierney authored Sep 3, 2018
2 parents 682c27f + 2b9339f commit 885de6f
Show file tree
Hide file tree
Showing 25 changed files with 232 additions and 25 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: naniar
Type: Package
Title: Data Structures, Summaries, and Visualisations for Missing Data
Version: 0.3.10.9000
Version: 0.3.10.9100
Authors@R: c(
person("Nicholas", "Tierney",
role = c("aut", "cre"),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ export(all_na)
export(any_complete)
export(any_miss)
export(any_na)
export(any_shade)
export(are_na)
export(are_shade)
export(as_shadow)
Expand Down Expand Up @@ -73,6 +74,7 @@ export(impute_mean_all)
export(impute_mean_at)
export(impute_mean_if)
export(is_na)
export(is_nabular)
export(is_shade)
export(is_shadow)
export(label_miss_1d)
Expand Down Expand Up @@ -100,6 +102,7 @@ export(n_miss)
export(n_miss_row)
export(n_var_complete)
export(n_var_miss)
export(nabular)
export(new_nabular)
export(new_shadow)
export(pct_complete)
Expand Down
11 changes: 10 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
# naniar 0.3.10.9000 (2018/08/20)
# naniar 0.3.10.9100 (2018/09/03)

## New Feature

* `any_shade()` returns a logical TRUE or FALSE depending on if there are any `shade` values
* `nabular()` an alias for `bind_shadow()` to tie the `nabular` term into the work.
* `is_nabular()` checks if input is nabular.


# naniar 0.3.10.9000 (2018/08/31)

## New Feature

Expand Down
28 changes: 26 additions & 2 deletions R/nabular.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,31 @@
#' @export
new_nabular <- function(x){

# include tests for checking that the data contains a shadow

# is there some mixture of shade and regular data?
if (sum(are_shade(x) == ncol(x)) | !any_shade(x)) {
rlang::abort(message = "data must have shadow data with the regular data")
}
tibble::new_tibble(x, subclass = "nabular")
}

#' Convert data into nabular form by binding shade to it
#'
#' Binding a shadow matrix to a regular dataframe converts it into nabular data,
#' which makes it easier to visualise and work with missing data.
#'
#' @param data a dataframe
#' @param only_miss logical - if FALSE (default) it will bind a dataframe with
#' all of the variables duplicated with their shadow. Setting this to TRUE
#' will bind variables only those variables that contain missing values.
#' See the examples for more details.
#' @param ... extra options to pass to [recode_shadow()] - a work in progress.
#'
#' @return data with the added variable shifted and the suffix `_NA`
#' @export
#' @seealso [bind_shadow()]
#'
#' @examples
#' @export
nabular <- function(data, only_miss = FALSE, ...){
bind_shadow(data = data, only_miss = only_miss, ...)
}
18 changes: 17 additions & 1 deletion R/shade.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ new_shade <- function(x, extra_levels = NULL){
#' xs <- shade(c(NA, 1, 2, "3"))
#'
#' is_shade(xs)
#' are_shade(xs)
#' any_shade(xs)
#'
#' aq_s <- as_shadow(airquality)
#'
#' is_shade(aq_s)
#' are_shade(aq_s)
#' any_shade(aq_s)
#' any_shade(airquality)
#'
#'
is_shade <- function(x){
inherits(x, "shade")
Expand Down Expand Up @@ -86,7 +96,7 @@ shade <- function(x, ..., extra_levels = NULL){
test_if_null(x)

if (length(x) == 0) {
rlang::abort(msg = "input to shade must have length > 0")
rlang::abort(message = "input to shade must have length > 0")
}

# if no other levels are specified
Expand Down Expand Up @@ -132,3 +142,9 @@ shade <- function(x, ..., extra_levels = NULL){
# and return a new shade value
new_shade(x, extra_levels)
}

#' @export
#' @rdname is_shade
any_shade <- function(x){
any(are_shade(x))
}
9 changes: 8 additions & 1 deletion R/shadow-verifiers.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,17 @@
#' is_shadow(aq_sh)
#' is_shadow(airquality)
#' is_shadow(aq_bind)
#' is_nabular(aq_bind)
#'
#' @export
#' @name is_shadow

is_shadow <- function(x){
inherits(x, "shadow")
}

#' @export
#' @rdname is_shadow
is_nabular <- function(x){
inherits(x, "nabular")
}

6 changes: 4 additions & 2 deletions R/shadows.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ bind_shadow <- function(data, only_miss = FALSE, ...){

# class(shadow_data) <- c("shadow", class(shadow_data))

return(new_shadow(shadow_data))
# return(new_shadow(shadow_data))
return(new_nabular(shadow_data))

# if you want All the values to be added (the default behaviour)
}
Expand All @@ -150,7 +151,8 @@ bind_shadow <- function(data, only_miss = FALSE, ...){

# class(shadow_data) <- c("shadow", class(shadow_data))

return(new_shadow(shadow_data))
# return(new_shadow(shadow_data))
return(new_nabular(shadow_data))

}

Expand Down
18 changes: 11 additions & 7 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ knitr::opts_chunk$set(
`naniar` provides principled, tidy ways to summarise, visualise, and manipulate missing data with minimal deviations from the workflows in ggplot2 and tidy data. It does this by providing:

- Shadow matrices, a tidy data structure for missing data:
- `as_shadow()` and `bind_shadow()`
- `bind_shadow()` and `nabular()`
- Shorthand summaries for missing data:
- `n_miss()` and `n_complete()`
- `pct_miss()`and `pct_complete()`
- Numerical summaries of missing data in variables and cases:
- `miss_var_summary()` and `miss_var_table()`
- `miss_case_summary()`, `miss_case_table()`
- Visualisation methods:
- Visualisation for missing data:
- `geom_miss_point()`
- `gg_miss_var()`
- `gg_miss_case()`
Expand All @@ -54,7 +54,7 @@ remotes::install_github("njtierney/naniar")

# A short overview of naniar

Visualising missing data might sound a little strange - how do you visualise something that is not there? One approach to visualising missing data comes from [ggobi](http://www.ggobi.org/) and [manet](https://www.swmath.org/software/3067), where we replace "NA" values with values 10% lower than the minimum value in that variable. This visualisation is provided with the `geom_miss_point()` ggplot2 geom - which we illustrate by exploring the relationship between Ozone and Solar radiation from the airquality dataset.
Visualising missing data might sound a little strange - how do you visualise something that is not there? One approach to visualising missing data comes from [ggobi](http://www.ggobi.org/) and [manet](https://www.swmath.org/software/3067), which replaces `NA` values with values 10% lower than the minimum value in that variable. This visualisation is provided with the `geom_miss_point()` ggplot2 geom - which we illustrate by exploring the relationship between Ozone and Solar radiation from the airquality dataset.

```{r regular-geom-point}
Expand All @@ -69,7 +69,7 @@ ggplot(data = airquality,

ggplot2 does not handle these missing values, and we get a warning message about the missing values.

We can instead use the `geom_miss_point()` to display the missing data
We can instead use `geom_miss_point()` to display the missing data

```{r geom-miss-point}
Expand Down Expand Up @@ -110,8 +110,14 @@ as_shadow(airquality)
```

Binding the shadow data to the data you help keep better track of the missing values. This format is called "nabular", a portmanteau of `NA` and `tabular`. You can bind the shadow to the data using `bind_shadow` or `nabular`:

Using the shadow matrix helps you manage where missing values are in your dataset and make it easy to do visualisations where you split by missingness:
```{r show-nabular}
bind_shadow(airquality)
nabular(airquality)
```

Using the nabular format helps you manage where missing values are in your dataset and make it easy to do visualisations where you split by missingness:

```{r shadow-w-ggplot}
Expand All @@ -125,7 +131,6 @@ airquality %>%

And even visualise imputations


```{r shadow-impute}
airquality %>%
Expand Down Expand Up @@ -165,7 +170,6 @@ gg_miss_span(pedestrian,
span_every = 1500)
```


You can read about all of the visualisations in naniar in the vignette [Gallery of missing data visualisations using naniar](http://naniar.njtierney.com/articles/naniar-visualisation.html).

naniar also provides handy helpers for calculating the number, proportion, and percentage of missing and complete observations:
Expand Down
50 changes: 45 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ manipulate missing data with minimal deviations from the workflows in
ggplot2 and tidy data. It does this by providing:

- Shadow matrices, a tidy data structure for missing data:
- `as_shadow()` and `bind_shadow()`
- `bind_shadow()` and `nabular()`
- Shorthand summaries for missing data:
- `n_miss()` and `n_complete()`
- `pct_miss()`and `pct_complete()`
- Numerical summaries of missing data in variables and cases:
- `miss_var_summary()` and `miss_var_table()`
- `miss_case_summary()`, `miss_case_table()`
- Visualisation methods:
- Visualisation for missing data:
- `geom_miss_point()`
- `gg_miss_var()`
- `gg_miss_case()`
Expand Down Expand Up @@ -61,7 +61,7 @@ remotes::install_github("njtierney/naniar")
Visualising missing data might sound a little strange - how do you
visualise something that is not there? One approach to visualising
missing data comes from [ggobi](http://www.ggobi.org/) and
[manet](https://www.swmath.org/software/3067), where we replace “NA”
[manet](https://www.swmath.org/software/3067), which replaces `NA`
values with values 10% lower than the minimum value in that variable.
This visualisation is provided with the `geom_miss_point()` ggplot2 geom
- which we illustrate by exploring the relationship between Ozone and
Expand All @@ -83,7 +83,7 @@ ggplot(data = airquality,
ggplot2 does not handle these missing values, and we get a warning
message about the missing values.

We can instead use the `geom_miss_point()` to display the missing data
We can instead use `geom_miss_point()` to display the missing data

``` r

Expand Down Expand Up @@ -156,7 +156,47 @@ as_shadow(airquality)
#> # ... with 143 more rows
```

Using the shadow matrix helps you manage where missing values are in
Binding the shadow data to the data you help keep better track of the
missing values. This format is called “nabular”, a portmanteau of `NA`
and `tabular`. You can bind the shadow to the data using `bind_shadow`
or `nabular`:

``` r
bind_shadow(airquality)
#> # A tibble: 153 x 12
#> Ozone Solar.R Wind Temp Month Day Ozone_NA Solar.R_NA Wind_NA
#> <int> <int> <dbl> <int> <int> <int> <fct> <fct> <fct>
#> 1 41 190 7.4 67 5 1 !NA !NA !NA
#> 2 36 118 8 72 5 2 !NA !NA !NA
#> 3 12 149 12.6 74 5 3 !NA !NA !NA
#> 4 18 313 11.5 62 5 4 !NA !NA !NA
#> 5 NA NA 14.3 56 5 5 NA NA !NA
#> 6 28 NA 14.9 66 5 6 !NA NA !NA
#> 7 23 299 8.6 65 5 7 !NA !NA !NA
#> 8 19 99 13.8 59 5 8 !NA !NA !NA
#> 9 8 19 20.1 61 5 9 !NA !NA !NA
#> 10 NA 194 8.6 69 5 10 NA !NA !NA
#> # ... with 143 more rows, and 3 more variables: Temp_NA <fct>,
#> # Month_NA <fct>, Day_NA <fct>
nabular(airquality)
#> # A tibble: 153 x 12
#> Ozone Solar.R Wind Temp Month Day Ozone_NA Solar.R_NA Wind_NA
#> <int> <int> <dbl> <int> <int> <int> <fct> <fct> <fct>
#> 1 41 190 7.4 67 5 1 !NA !NA !NA
#> 2 36 118 8 72 5 2 !NA !NA !NA
#> 3 12 149 12.6 74 5 3 !NA !NA !NA
#> 4 18 313 11.5 62 5 4 !NA !NA !NA
#> 5 NA NA 14.3 56 5 5 NA NA !NA
#> 6 28 NA 14.9 66 5 6 !NA NA !NA
#> 7 23 299 8.6 65 5 7 !NA !NA !NA
#> 8 19 99 13.8 59 5 8 !NA !NA !NA
#> 9 8 19 20.1 61 5 9 !NA !NA !NA
#> 10 NA 194 8.6 69 5 10 NA !NA !NA
#> # ... with 143 more rows, and 3 more variables: Temp_NA <fct>,
#> # Month_NA <fct>, Day_NA <fct>
```

Using the nabular format helps you manage where missing values are in
your dataset and make it easy to do visualisations where you split by
missingness:

Expand Down
Binary file modified man/figures/README-facet-by-month-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-geom-miss-point-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-gg-miss-span-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-gg-miss-upset-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-gg-miss-var-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-regular-geom-point-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-shadow-impute-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified man/figures/README-shadow-w-ggplot-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions man/is_shade.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/is_shadow.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/nabular.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-bind-shadow.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ test_that("bind_shadow returns a data.frame",{

test_that("bind_shadow returns a tibble",{
expect_equal(class(bind_shadow(airquality)),
c("shadow", "tbl_df", "tbl", "data.frame"))
c("nabular", "tbl_df", "tbl", "data.frame"))
})

test_that("bind_shadow errors when given non dataframe or 0 entry",{
Expand Down
Loading

0 comments on commit 885de6f

Please sign in to comment.