-
Notifications
You must be signed in to change notification settings - Fork 414
/
data-generator-v2.R
117 lines (99 loc) · 4.71 KB
/
data-generator-v2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
library("tidyverse")
library("data.table")
big_mac_countries <- c(
"ARG", "AUS", "BRA", "GBR", "CAN", "CHL", "CHN", "CZE", "DNK",
"EGY", "HKG", "HUN", "IDN", "ISR", "JPN", "MYS", "MEX", "NZL",
"NOR", "PER", "PHL", "POL", "RUS", "SAU", "SGP", "ZAF", "KOR",
"SWE", "CHE", "TWN", "THA", "TUR", "ARE", "USA", "COL", "CRI",
"PAK", "LKA", "UKR", "URY", "IND", "VNM", "GTM", "HND", "VEN",
"NIC", "AZE", "BHR", "JOR", "KWT", "LBN", "MDA", "OMN",
"QAT", "ROU", "EUZ"
)
big_mac_data <- fread("./source-data/big-mac-source-data-v2.csv", na.strings = "#N/A") %>%
.[!is.na(local_price)] %>% # remove lines where the local price is missing
.[, GDP_local := as.numeric(GDP_local)] %>% # convert GDP to a number
.[order(date, name)] # sort by date and then by country name, for easy reading
latest_date <- big_mac_data$date %>% max()
big_mac_data[, dollar_price := local_price / dollar_ex]
base_currencies <- c("USD", "EUR", "GBP", "JPY", "CNY")
big_mac_index <- big_mac_data[
!is.na(dollar_price) & iso_a3 %in% big_mac_countries,
.(date, iso_a3, currency_code, name, local_price, dollar_ex, dollar_price)
]
for (currency in base_currencies) {
big_mac_index[
, # we don't want a subset, so our first argument is blank
(currency) := # we'll add a new column named for the base set
dollar_price / # we divide the dollar price in each row by
# the dollar price on the *base currency*'s row (.SD is a data.table
.SD[currency_code == currency]$dollar_price - # that contains only the current group)
1, # one means parity (neither over- nor under-valued), so we subtract one
# to get an over/under-valuation value
by = date # and of course, we'll group these rows by date
]
}
big_mac_index[, (base_currencies) := round(.SD, 5L), .SDcols = base_currencies]
to_plot <- big_mac_index[date == latest_date]
to_plot$name <- factor(to_plot$name, levels = to_plot$name[order(to_plot$USD)])
fwrite(big_mac_index, "./output-data/big-mac-raw-index.csv")
big_mac_gdp_data <- big_mac_data[GDP_local > 0]
regression_countries <- c(
"ARG", "AUS", "BRA", "GBR", "CAN", "CHL", "CHN", "CZE", "DNK",
"EGY", "EUZ", "HKG", "HUN", "IDN", "ISR", "JPN", "MYS", "MEX",
"NZL", "NOR", "PER", "PHL", "POL", "RUS", "SAU", "SGP", "ZAF",
"KOR", "SWE", "CHE", "TWN", "THA", "TUR", "USA", "COL", "PAK",
"IND", "AUT", "BEL", "NLD", "FIN", "FRA", "DEU", "IRL", "ITA",
"PRT", "ESP", "GRC", "EST"
)
# in 2021, we added a number of additional countries to the adjusted index
regression_addons_2021 <- c(
"ARE", "CRI", "LKA", "UKR", "URY", "VNM", "GTM", "HND", "NIC",
"AZE", "BHR", "HRV", "JOR", "KWT", "MDA", "OMN", "QAT", "ROU",
"SVK", "SVN", "LVA", "LTU"
)
big_mac_gdp_data <- big_mac_gdp_data[iso_a3 %in% regression_countries |
(iso_a3 %in% regression_addons_2021 & date >= as.Date("2021-01-01"))]
big_mac_gdp_data %>%
.[, GDP_bigmac := GDP_local / (local_price / .SD[iso_a3 == "USA"]$local_price), by = date]
big_mac_gdp_data[
,
`:=`(
adj_price = lm(dollar_price ~ GDP_bigmac) %>% predict()
# adj_price_USD=lm(dollar_price ~ GDP_dollar) %>% predict
),
by = date
]
big_mac_adj_index <- big_mac_gdp_data[
!is.na(dollar_price) &
(
iso_a3 %in% regression_countries |
iso_a3 %in% regression_addons_2021 & date >= "2021-01-01"
) &
iso_a3 %in% big_mac_countries,
.(date, iso_a3, currency_code, name, local_price, dollar_ex, dollar_price, GDP_bigmac, adj_price)
]
for (currency in base_currencies) {
big_mac_adj_index[
, # we don't want a subset, so our first argument is blank
(currency) := # we'll add a new column named for the base set
( # we divide the dollar price by the adjusted price to get
dollar_price / adj_price # the deviation from our expectation by
) /
# the same figure from the *base currency*'s rowa\
(
.SD[currency_code == currency]$dollar_price /
.SD[currency_code == currency]$adj_price
) -
1, # one means parity (neither over- nor under-valued), so we subtract one
# to get an over/under-valuation value
by = date # and of course, we'll group these rows by date
]
}
big_mac_adj_index[, (base_currencies) := round(.SD, 5L), .SDcols = base_currencies]
fwrite(big_mac_adj_index, "./output-data/big-mac-adjusted-index.csv")
big_mac_full_index <- merge(big_mac_index, big_mac_adj_index,
by = c("date", "iso_a3", "currency_code", "name", "local_price", "dollar_ex", "dollar_price"),
suffixes = c("_raw", "_adjusted"),
all.x = TRUE
)
fwrite(big_mac_full_index, "./output-data/big-mac-full-index.csv")