-
Notifications
You must be signed in to change notification settings - Fork 0
/
DataPrep.R
65 lines (42 loc) · 1.59 KB
/
DataPrep.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Data transformation and NA handling
# run config file
source("~/ForecastReturnDistribution/config.R")
# import preparation functions
source(file.path(path$func,"func_DataPrep.R"))
# load packages
library(tidyr)
# read data
dat <- readRDS(paste0(creationDataDate,"rawData.rds"))
##### Transform Data #####
# get column names
name_dat <- names(dat)
# get all adjusted_ ... names
name_adj <- str_subset(name_dat,"adjusted\\_")
# get all volume_ ... names
name_vol <- str_subset(name_dat,"volume\\_")
# transform with diff log x_t (t5 function)
dat <- dat %>% mutate(across(all_of(c(name_adj,name_vol)), t5))
##### NA handling #####
# get information how good is the quality of the time series
tsQuality <- TSnaInfo(dat,"date")
if(max(tsQuality$numNAbtwTSstartend)>0){
# names of NA columns
name_NA <- rownames(tsQuality)[tsQuality$numNAbtwTSstartend>0]
# console output of quality
cat("Fill missing values for: ",
paste(rownames(tsQuality)[tsQuality$numNAbtwTSstartend>0], collapse = ", "),
"\n",
"Maximal imputation: ",
max(tsQuality$numNAbtwTSstartend),
"\n",
"Maximal number of consecutive NAs: ",
max(tsQuality$numConsNAbtwTSstartend), "\n", sep="")
# impute missing values with most recent observation
dat <- dat %>% arrange(date) %>%
fill(all_of(name_NA))
# check if imputation is complete
tsFillQuality <- TSnaInfo(dat,"date")
if( any(tsFillQuality$numNAbtwTSstartend!=0) ) stop("Something went wrong with NA filling")
}
##### Save Data #####
saveRDS(dat, file=paste0(creationDataDate,"NAFilledData.rds"))