diff --git a/DESCRIPTION b/DESCRIPTION index bc9912e..e2355aa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: metrica Title: Prediction Performance Metrics -Version: 2.0.5 +Version: 2.0.4 Date: 2024-03-23 Authors@R: c( person("Adrian A.", "Correndo", email = "correndo@ksu.edu", role = c("cre", "cph"), comment = c(ORCID = "0000-0002-4172-289X")), diff --git a/NEWS.md b/NEWS.md index 6115836..ca860f3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,27 +1,22 @@ -# metrica 2.0.5 +# metrica 2.0.4 This version: -1) adds a new classification metric called P4 (https://en.wikipedia.org/wiki/P4-metric). https://github.com/adriancorrendo/metrica/issues/36 +1) fixes an issue found for Debian for Importing APSIM NewGeneration files that violated CRAN Policy's. + +2) adds a new classification metric called P4, following Sitarz, M. (2023) doi:10.54364/AAIML.2023.1161. +https://github.com/adriancorrendo/metrica/issues/36 Special thanks to Davide Chicco for suggesting this metric. -2) fixes an error in the invF05 formula from the adjusted F-score metric (agf): +3) fixes an error in the invF05 formula from the adjusted F-score metric (agf): - InvF0.5=(5/4) * ((npv * spec)/( (0.5^2*npv) +spec )) https://github.com/adriancorrendo/metrica/issues/37 Special thanks to Gilles Koumou for pointing out the error. -3) adds a new plot argument, shape_fill, indicating the shape fill for the data points on the 'scatter_plot' and 'bland_altman_plot' functions. +4) adds a new plot argument, shape_fill, indicating the shape fill for the data points on the 'scatter_plot' and 'bland_altman_plot' functions. -###################################################################################################### +##################################################################################################### - PREVIOUS VERSIONS -# metrica 2.0.4 -This version fixes an issue found for Debian for Importing APSIM NewGeneration files that violated CRAN Policy's. - -Change details: - -1) Adding the P4-metric to the classification metrics group following Sitarz, M. (2023) doi:10.54364/AAIML.2023.1161 - - # metrica 2.0.3 This version fixes an issue found for Debian for Importing APSIM NewGeneration files that violated CRAN Policy's. diff --git a/README.Rmd b/README.Rmd index 4e96da4..deead23 100644 --- a/README.Rmd +++ b/README.Rmd @@ -194,7 +194,7 @@ example.data <- barley %>% # or 'wheat', 'sorghum', or 'chickpea' ### 3.1.1. Plot functions ### 3.1.1.1. Create a customizable scatter plot with PO orientation -```{r warning=FALSE, message=FALSE, fig.height=4, fig.width=5, dpi=300} +```{r warning=FALSE, message=FALSE, fig.height=4, fig.width=5, dpi=90} barley.scat.plot <- metrica::scatter_plot(data = example.data, @@ -223,7 +223,7 @@ barley.scat.plot ``` ### 3.1.1.2. Create tiles plot with OP orientation -```{r warning=FALSE, message=FALSE, fig.height=4, fig.width=5, dpi=300} +```{r warning=FALSE, message=FALSE, fig.height=4, fig.width=5, dpi=90} barley.tiles.plot <- tiles_plot(data = example.data, obs = measured, @@ -236,7 +236,7 @@ barley.tiles.plot ``` ### 3.1.1.3. Create a density plot with OP orientation -```{r warning=FALSE, message=FALSE, fig.height=4, fig.width=5, dpi=300} +```{r warning=FALSE, message=FALSE, fig.height=4, fig.width=5, dpi=90} barley.density.plot <- metrica::density_plot(data = example.data, obs = measured, pred = simulated, @@ -249,7 +249,7 @@ barley.density.plot ``` ### 3.1.1.4. Create a Bland-Altman plot -```{r warning=FALSE, message=FALSE, fig.height=4, fig.width=5, dpi=300} +```{r warning=FALSE, message=FALSE, fig.height=4, fig.width=5, dpi=90} barley.ba.plot <- metrica::bland_altman_plot(data = example.data, obs = measured, pred = simulated) @@ -335,7 +335,7 @@ head(non_nested_summary) ``` ### 3.1.4. Print metrics in a plot -```{r warning=F, message=F, fig.height=5, fig.width=7, dpi=300} +```{r warning=F, message=F, fig.height=5, fig.width=7, dpi=90} df <- metrica::wheat # Create list of selected metrics @@ -375,7 +375,7 @@ multinomial_case <- data.frame(labels = sample(c("Red","Green", "Blue"), 100, re ### 3.1.1. Confusion Matrix
### 3.1.1.1. Binary -```{r warning=FALSE, message=FALSE, fig.height=6, fig.width=7, dpi=300} +```{r warning=FALSE, message=FALSE, fig.height=6, fig.width=7, dpi=90} # a. Print binomial_case %>% confusion_matrix(obs = labels, pred = predictions, plot = FALSE, colors = c(low="#f9dbbd" , high="#735d78"), @@ -388,7 +388,7 @@ binomial_case %>% confusion_matrix(obs = labels, pred = predictions, ``` ### 3.1.1.2. Multiclass -```{r warning=FALSE, message=FALSE, fig.height=6, fig.width=7, dpi=300} +```{r warning=FALSE, message=FALSE, fig.height=6, fig.width=7, dpi=90} # a. Print multinomial_case %>% confusion_matrix(obs = labels, pred = predictions, diff --git a/man/figures/README-unnamed-chunk-12-1.png b/man/figures/README-unnamed-chunk-12-1.png index 2780cdc..c2d6969 100644 Binary files a/man/figures/README-unnamed-chunk-12-1.png and b/man/figures/README-unnamed-chunk-12-1.png differ diff --git a/man/figures/README-unnamed-chunk-14-1.png b/man/figures/README-unnamed-chunk-14-1.png index 96e4555..6455b67 100644 Binary files a/man/figures/README-unnamed-chunk-14-1.png and b/man/figures/README-unnamed-chunk-14-1.png differ diff --git a/man/figures/README-unnamed-chunk-15-1.png b/man/figures/README-unnamed-chunk-15-1.png index 1a702db..dd9ca51 100644 Binary files a/man/figures/README-unnamed-chunk-15-1.png and b/man/figures/README-unnamed-chunk-15-1.png differ diff --git a/man/figures/README-unnamed-chunk-4-1.png b/man/figures/README-unnamed-chunk-4-1.png index cf3bce2..3dba6bf 100644 Binary files a/man/figures/README-unnamed-chunk-4-1.png and b/man/figures/README-unnamed-chunk-4-1.png differ diff --git a/man/figures/README-unnamed-chunk-5-1.png b/man/figures/README-unnamed-chunk-5-1.png index f5be527..603c9e8 100644 Binary files a/man/figures/README-unnamed-chunk-5-1.png and b/man/figures/README-unnamed-chunk-5-1.png differ diff --git a/man/figures/README-unnamed-chunk-6-1.png b/man/figures/README-unnamed-chunk-6-1.png index 3eeb344..b00d5cb 100644 Binary files a/man/figures/README-unnamed-chunk-6-1.png and b/man/figures/README-unnamed-chunk-6-1.png differ diff --git a/man/figures/README-unnamed-chunk-7-1.png b/man/figures/README-unnamed-chunk-7-1.png index 7eb1f79..f7a82be 100644 Binary files a/man/figures/README-unnamed-chunk-7-1.png and b/man/figures/README-unnamed-chunk-7-1.png differ diff --git a/man/figures/cheatsheet.png b/man/figures/cheatsheet.png index 881a3ac..828331b 100644 Binary files a/man/figures/cheatsheet.png and b/man/figures/cheatsheet.png differ diff --git a/man/figures/land_cover_example.png b/man/figures/land_cover_example.png index 034bda7..80680ae 100644 Binary files a/man/figures/land_cover_example.png and b/man/figures/land_cover_example.png differ diff --git a/man/figures/maize_phenology_example.png b/man/figures/maize_phenology_example.png index 72b0c54..ea435cc 100644 Binary files a/man/figures/maize_phenology_example.png and b/man/figures/maize_phenology_example.png differ diff --git a/man/figures/shinyapp.png b/man/figures/shinyapp.png index 4f6282c..f059625 100644 Binary files a/man/figures/shinyapp.png and b/man/figures/shinyapp.png differ diff --git a/vignettes/regression_case.Rmd b/vignettes/regression_case.Rmd index a2d5aa9..c02ead2 100644 --- a/vignettes/regression_case.Rmd +++ b/vignettes/regression_case.Rmd @@ -1,278 +1,278 @@ ---- -title: "Regression case: Assessing model agreement in wheat grain nitrogen content prediction" -author: "Leo Bastos & Adrian Correndo" -date: "`r Sys.Date()`" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Regression case} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -## 1. Introduction -
-The *`metrica`* package was developed to visualize and compute the level of agreement between observed ground-truth values and model-derived (e.g., mechanistic or empirical) predictions. - -This package is intended to fit into the following workflow: - -1. a data set containing the observed values is used to train a model -2. the trained model is used to generate predictions -3. a data frame containing at least the **observed** and model-**predicted** values is created -4. *`metrica`* package is used to compute goodness of fit and error metrics based on observed and predicted values -5. *`metrica`* package is used to visualize model fit and selected fit metrics - -This vignette introduces the functionality of the *`metrica`* package applied to observed and model-predicted values of wheat grain nitrogen (N) content (in grams of N $m^{-2}$). - -## 2. Wheat grain N content -Let's begin by loading the packages needed. -```{r libraries, message=F, warning=F} -library(ggplot2) -library(dplyr) -library(metrica) -``` - -Now we load the `wheat` data set included in the `metrica` package. -```{r load data} -# Load -data(wheat) - -# Printing first observations -head(wheat) -``` - -This data set contains two columns: - -- **pred**: model-predicted wheat grain N content, in g N $m^{-2}$, -- **obs**: ground-truth observed wheat grain N content, in g N $m^{-2}$ - -## 3. Visual assessment of agreement -### 3.1 Scatterplot of pred vs. obs -The simplest way to visually assess agreement between observed and predicted values is with a scatterplot. - -We can use the function `scatter_plot()` from the *metrica* package to create a scatterplot. - -The function requires specifying at least: - -- the data frame object name (`data` argument) -- the name of the column containing observed values (`obs` argument) -- the name of the column containing predicted values (`pred` argument) - -Besides a scatterplot, this function also adds to the plot the **1:1 line** (solid line) and the **linear regression line** (dashed line). - -```{r scatter_plot PO, fig.width=5, fig.height=4, dpi=200} -scatter_plot(data = wheat, - obs = obs, - pred = pred) -``` - -The default behavior of `scatter_plot()` places the `obs` column on the x axis and the `pred` column on the y axis (`orientation = "PO"`). This can be inverted by changing the argument `orientation` to "OP": -```{r scatter_plot OP, fig.width=5, fig.height=4, dpi=200} -scatter_plot(data = wheat, - obs = obs, - pred = pred, - orientation = "OP") -``` - - -The output of the `scatter_plot()` function is a `ggplot2` object that can be further customized: -```{r scatter_plot custom, fig.width=5, fig.height=4, dpi=200} -scatter_plot(data = wheat, - obs = obs, - pred = pred, - orientation = "OP", - regline_color = "#d0f4de", - shape_color = "#80ed99", - eq_color = "white", - )+ - labs(x ="Predicted wheat N content (g N/m2)", - y = "Observed wheat N content (g N/m2)")+ - theme_dark() -``` - -## 3.2 Bland-Altman plot -The Bland-Altman plot is another way of visually assessing observed vs. predicted agreement. It plots the difference between observed and predicted values on the y axis, and the observed values on the x axis: - -```{r bland-altman, fig.width=5, fig.height=4, dpi=200} -bland_altman_plot(data = wheat, - obs = obs, - pred = pred) -``` - -## 4. Numerical assessment of agreement -The *metrica* package contains functions for **41 metrics** to assess agreement between observed and predicted values for continuous data (i.e., regression error). - -A list with all the the metrics including their name, definition, details, formula, and function name, please check [here]. - -All of the metric functions take the same three arguments as the plotting functions: - -- the data frame object name (`data` argument) -- the name of the column containing observed values (`obs` argument) -- the name of the column containing predicted values (`pred` argument) - -The user can choose to calculate a single metric, or to calculate all metrics at once. - -To calculate a single metric, the metric function can be called. -For example, to calculate $R^{2}$, we can use the `R2()` function: -```{r r2} -R2(data = wheat, - obs = obs, - pred = pred, tidy = TRUE) - -``` - -Similarly, to calculate root mean squared error, we can use the `RMSE()` function: -```{r rmse} -RMSE(data = wheat, - obs = obs, - pred = pred) -``` - -The user can also calculate all 41 metrics at once using the function `metrics_summary()`: -```{r metrics summary} -metrics_summary(data = wheat, - obs = obs, - pred = pred, - type = "regression") - -``` - -If the user wants just specific metrics, within the same function `metrics_summary()`, user can pass a list of desired metrics using the argument "metrics_list" as follows: -```{r metrics summary list} - -my.metrics <- c("R2","MBE", "RMSE", "RSR", "NSE", "KGE", "CCC") - -metrics_summary(data = wheat, - obs = obs, - pred = pred, - type = "regression", - metrics_list = my.metrics) - -``` -## 5. Time series
- -### 5.1. Example of timeseries prediction - -In some cases, we may count with time-series predictions (e.g. cumulative values from daily simulations). For example, let's say that we evaluate the production of drymass during the season. For this specific case, the Mean Absolute Scaled Error is a more solid metric compared to conventional RMSE or similar metrics.
- -Let's suppose that we have predictions of wheat grain N over the years on the same location -for a series of 20 years from 2001 to 2020. Thus, we may get a random sample from the wheat data set and assume they represent the time series of interest. Therefore, we create a new `time` variable called `Year` that will serve to sort the observations. - -```{r metrics time-series, fig.width=6, fig.height=5, dpi=200} -set.seed(165) - -wheat_time <- metrica::wheat %>% sample_n(., size = 20) %>% - mutate(Year = seq(2001,2020, by =1)) - -# Plot -wheat_time %>% ggplot2::ggplot(aes(x = Year))+ - geom_point(aes(y = pred, fill = "Predicted", shape = "Predicted"))+ - geom_point(aes(y = obs, fill = "Observed", shape = "Observed"))+ - geom_line(aes(y = pred, col = "Predicted", linetype = "Predicted"), size = .75)+ - geom_line(aes(y = obs, col = "Observed", linetype = "Observed"), size = .75)+ - scale_fill_manual(name = "", values = c("dark red","steelblue"))+ - scale_shape_manual(name = "", values = c(21,24))+ - scale_color_manual(name = "", values = c("dark red","steelblue"))+ - scale_linetype_manual(name = "", values = c(1,2))+ - labs(x = "Year", y = "Wheat Grain N (g/m2)")+ - theme_bw()+ - theme(legend.position = "top") -``` - -### 5.2. Use MASE for timeseries
- -In the case of timeseries analysis, the Mean Absolute Scaled Error (MASE, Hyndman & Koehler, 2006), -a scaled error metric, is preferable over other classic metrics such as the RMSE. With `metrica`, -we can use the function MASE. Please, be aware that MASE requires the `obs` and `pred` data along with a third column corresponding to the temporal variable that sorts the data (use the `time` argument to specify it). The default method to scale the MASE is the `naive` forecast (random-walk), which -requires the user to define the size of the `naive_step`. Otherwise, an out-of-bag MAE can be specified with the `oob_mae` argument.
- -```{r MASE} - -# MASE estimate, with naive approach (random-walk, i.e. using observation of t-1 as prediction) -metrica::MASE(data = wheat_time, obs = obs, pred = pred, - naive_step = 1, tidy = FALSE, time = "Year") - -metrica::MASE(data = wheat_time, obs = obs, pred = pred, - naive_step = 1, tidy = FALSE) - -# MASE estimate, with mae coming from an independent training set. -metrica::MASE(data = wheat_time, obs = obs, pred = pred, - naive_step = 1, tidy = FALSE, time = "Year", oob_mae = 6) - - -``` - - - -## 6. Visual and numerical assessment combined -The user can also create a scatter plot that includes not only the **predicted** vs. **observed** points, **1:1 line**, and **regression line**, but also **selected metrics and their values** plus the **SMA regression equation**. - -This is accomplished with the function `scatter_plot()`: - -```{r scatter_plot, fig.width=6, fig.height=5, dpi=200} -scatter_plot(data = wheat, - obs = obs, - pred = pred) - -``` - -To print the metrics on the `scatter_plot()`, just use print.metrics. Warning: do not forget to specify your 'metrics.list': - -```{r scatter_plot print_metrics, fig.width=6, fig.height=5, dpi=200} - -my.metrica.plot <- scatter_plot(data = wheat, - obs = obs, - pred = pred, - print_metrics = TRUE, metrics_list = my.metrics) - -my.metrica.plot - -``` -Also, as a ggplot element, outputs are flexible of further edition: - -```{r scatter_plot.edit, fig.width=6, fig.height=5, dpi=200} - -my.metrica.plot + - # Modify labels - labs(x = "Observed (days to emergence)", y = "Predicted (days to emergence)")+ - # Modify theme - theme_light() - -my.metrica.plot + - # Modify labels - labs(x = "Observed (Mg/ha)", y = "Predicted (Mg/ha)")+ - # Modify theme - theme_dark() -``` - - -## 7. Exporting -To export the metrics summary table, the user can simply write it to file with the function `write.csv()`: - -```{r export metrics_summary, eval=F } -metrics_summary(data = wheat, - obs = obs, - pred = pred, - type = "regression") %>% - write.csv("metrics_summary.csv") - -``` - - -Similarly, to export a plot, the user can simply write it to file with the function `ggsave()`: - -```{r export plot, eval=F} - -ggsave(plot = my.metrica.plot, - "scatter_metrics.png", - width = 5, - height = 5) -``` - - +--- +title: "Regression case: Assessing model agreement in wheat grain nitrogen content prediction" +author: "Leo Bastos & Adrian Correndo" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Regression case} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## 1. Introduction +
+The *`metrica`* package was developed to visualize and compute the level of agreement between observed ground-truth values and model-derived (e.g., mechanistic or empirical) predictions. + +This package is intended to fit into the following workflow: + +1. a data set containing the observed values is used to train a model +2. the trained model is used to generate predictions +3. a data frame containing at least the **observed** and model-**predicted** values is created +4. *`metrica`* package is used to compute goodness of fit and error metrics based on observed and predicted values +5. *`metrica`* package is used to visualize model fit and selected fit metrics + +This vignette introduces the functionality of the *`metrica`* package applied to observed and model-predicted values of wheat grain nitrogen (N) content (in grams of N $m^{-2}$). + +## 2. Wheat grain N content +Let's begin by loading the packages needed. +```{r libraries, message=F, warning=F} +library(ggplot2) +library(dplyr) +library(metrica) +``` + +Now we load the `wheat` data set included in the `metrica` package. +```{r load data} +# Load +data(wheat) + +# Printing first observations +head(wheat) +``` + +This data set contains two columns: + +- **pred**: model-predicted wheat grain N content, in g N $m^{-2}$, +- **obs**: ground-truth observed wheat grain N content, in g N $m^{-2}$ + +## 3. Visual assessment of agreement +### 3.1 Scatterplot of pred vs. obs +The simplest way to visually assess agreement between observed and predicted values is with a scatterplot. + +We can use the function `scatter_plot()` from the *metrica* package to create a scatterplot. + +The function requires specifying at least: + +- the data frame object name (`data` argument) +- the name of the column containing observed values (`obs` argument) +- the name of the column containing predicted values (`pred` argument) + +Besides a scatterplot, this function also adds to the plot the **1:1 line** (solid line) and the **linear regression line** (dashed line). + +```{r scatter_plot PO, fig.width=5, fig.height=4, dpi=90, warning=FALSE, message=FALSE} +scatter_plot(data = wheat, + obs = obs, + pred = pred) +``` + +The default behavior of `scatter_plot()` places the `obs` column on the x axis and the `pred` column on the y axis (`orientation = "PO"`). This can be inverted by changing the argument `orientation` to "OP": +```{r scatter_plot OP, fig.width=5, fig.height=4, dpi=90, warning=FALSE, message=FALSE} +scatter_plot(data = wheat, + obs = obs, + pred = pred, + orientation = "OP") +``` + + +The output of the `scatter_plot()` function is a `ggplot2` object that can be further customized: +```{r scatter_plot custom, fig.width=5, fig.height=4, dpi=90, warning=FALSE, message=FALSE} +scatter_plot(data = wheat, + obs = obs, + pred = pred, + orientation = "OP", + regline_color = "#d0f4de", + shape_color = "#80ed99", + eq_color = "white", + )+ + labs(x ="Predicted wheat N content (g N/m2)", + y = "Observed wheat N content (g N/m2)")+ + theme_dark() +``` + +## 3.2 Bland-Altman plot +The Bland-Altman plot is another way of visually assessing observed vs. predicted agreement. It plots the difference between observed and predicted values on the y axis, and the observed values on the x axis: + +```{r bland-altman, fig.width=5, fig.height=4, dpi=90, warning=FALSE, message=FALSE} +bland_altman_plot(data = wheat, + obs = obs, + pred = pred) +``` + +## 4. Numerical assessment of agreement +The *metrica* package contains functions for **41 metrics** to assess agreement between observed and predicted values for continuous data (i.e., regression error). + +A list with all the the metrics including their name, definition, details, formula, and function name, please check [here]. + +All of the metric functions take the same three arguments as the plotting functions: + +- the data frame object name (`data` argument) +- the name of the column containing observed values (`obs` argument) +- the name of the column containing predicted values (`pred` argument) + +The user can choose to calculate a single metric, or to calculate all metrics at once. + +To calculate a single metric, the metric function can be called. +For example, to calculate $R^{2}$, we can use the `R2()` function: +```{r r2} +R2(data = wheat, + obs = obs, + pred = pred, tidy = TRUE) + +``` + +Similarly, to calculate root mean squared error, we can use the `RMSE()` function: +```{r rmse} +RMSE(data = wheat, + obs = obs, + pred = pred) +``` + +The user can also calculate all 41 metrics at once using the function `metrics_summary()`: +```{r metrics summary} +metrics_summary(data = wheat, + obs = obs, + pred = pred, + type = "regression") + +``` + +If the user wants just specific metrics, within the same function `metrics_summary()`, user can pass a list of desired metrics using the argument "metrics_list" as follows: +```{r metrics summary list} + +my.metrics <- c("R2","MBE", "RMSE", "RSR", "NSE", "KGE", "CCC") + +metrics_summary(data = wheat, + obs = obs, + pred = pred, + type = "regression", + metrics_list = my.metrics) + +``` +## 5. Time series
+ +### 5.1. Example of timeseries prediction + +In some cases, we may count with time-series predictions (e.g. cumulative values from daily simulations). For example, let's say that we evaluate the production of drymass during the season. For this specific case, the Mean Absolute Scaled Error is a more solid metric compared to conventional RMSE or similar metrics.
+ +Let's suppose that we have predictions of wheat grain N over the years on the same location +for a series of 20 years from 2001 to 2020. Thus, we may get a random sample from the wheat data set and assume they represent the time series of interest. Therefore, we create a new `time` variable called `Year` that will serve to sort the observations. + +```{r metrics time-series, fig.width=6, fig.height=5, dpi=90} +set.seed(165) + +wheat_time <- metrica::wheat %>% sample_n(., size = 20) %>% + mutate(Year = seq(2001,2020, by =1)) + +# Plot +wheat_time %>% ggplot2::ggplot(aes(x = Year))+ + geom_point(aes(y = pred, fill = "Predicted", shape = "Predicted"))+ + geom_point(aes(y = obs, fill = "Observed", shape = "Observed"))+ + geom_line(aes(y = pred, col = "Predicted", linetype = "Predicted"), size = .75)+ + geom_line(aes(y = obs, col = "Observed", linetype = "Observed"), size = .75)+ + scale_fill_manual(name = "", values = c("dark red","steelblue"))+ + scale_shape_manual(name = "", values = c(21,24))+ + scale_color_manual(name = "", values = c("dark red","steelblue"))+ + scale_linetype_manual(name = "", values = c(1,2))+ + labs(x = "Year", y = "Wheat Grain N (g/m2)")+ + theme_bw()+ + theme(legend.position = "top") +``` + +### 5.2. Use MASE for timeseries
+ +In the case of timeseries analysis, the Mean Absolute Scaled Error (MASE, Hyndman & Koehler, 2006), +a scaled error metric, is preferable over other classic metrics such as the RMSE. With `metrica`, +we can use the function MASE. Please, be aware that MASE requires the `obs` and `pred` data along with a third column corresponding to the temporal variable that sorts the data (use the `time` argument to specify it). The default method to scale the MASE is the `naive` forecast (random-walk), which +requires the user to define the size of the `naive_step`. Otherwise, an out-of-bag MAE can be specified with the `oob_mae` argument.
+ +```{r MASE} + +# MASE estimate, with naive approach (random-walk, i.e. using observation of t-1 as prediction) +metrica::MASE(data = wheat_time, obs = obs, pred = pred, + naive_step = 1, tidy = FALSE, time = "Year") + +metrica::MASE(data = wheat_time, obs = obs, pred = pred, + naive_step = 1, tidy = FALSE) + +# MASE estimate, with mae coming from an independent training set. +metrica::MASE(data = wheat_time, obs = obs, pred = pred, + naive_step = 1, tidy = FALSE, time = "Year", oob_mae = 6) + + +``` + + + +## 6. Visual and numerical assessment combined +The user can also create a scatter plot that includes not only the **predicted** vs. **observed** points, **1:1 line**, and **regression line**, but also **selected metrics and their values** plus the **SMA regression equation**. + +This is accomplished with the function `scatter_plot()`: + +```{r scatter_plot, fig.width=6, fig.height=5, dpi=90} +scatter_plot(data = wheat, + obs = obs, + pred = pred) + +``` + +To print the metrics on the `scatter_plot()`, just use print.metrics. Warning: do not forget to specify your 'metrics.list': + +```{r scatter_plot print_metrics, fig.width=6, fig.height=5, dpi=90} + +my.metrica.plot <- scatter_plot(data = wheat, + obs = obs, + pred = pred, + print_metrics = TRUE, metrics_list = my.metrics) + +my.metrica.plot + +``` +Also, as a ggplot element, outputs are flexible of further edition: + +```{r scatter_plot.edit, fig.width=6, fig.height=5, dpi=90} + +my.metrica.plot + + # Modify labels + labs(x = "Observed (days to emergence)", y = "Predicted (days to emergence)")+ + # Modify theme + theme_light() + +my.metrica.plot + + # Modify labels + labs(x = "Observed (Mg/ha)", y = "Predicted (Mg/ha)")+ + # Modify theme + theme_dark() +``` + + +## 7. Exporting +To export the metrics summary table, the user can simply write it to file with the function `write.csv()`: + +```{r export metrics_summary, eval=F } +metrics_summary(data = wheat, + obs = obs, + pred = pred, + type = "regression") %>% + write.csv("metrics_summary.csv") + +``` + + +Similarly, to export a plot, the user can simply write it to file with the function `ggsave()`: + +```{r export plot, eval=F} + +ggsave(plot = my.metrica.plot, + "scatter_metrics.png", + width = 5, + height = 5) +``` + +