Skip to content

Commit

Permalink
add plots and start to split log files
Browse files Browse the repository at this point in the history
  • Loading branch information
annacprice committed Sep 6, 2021
1 parent 6fa074f commit e1acb34
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 15 deletions.
86 changes: 72 additions & 14 deletions covate/build_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,19 @@ def buildmodel(timeseries, lineagelist, regionlist, enddate, output, validate):

for lineage in lineagelist:

# set log files and plt
path = os.path.join(output, str(getenddate(enddate)), lineage, 'logs')
filename = path + '/' + lineage + '_log.txt'
errorlog = os.path.join(output, str(getenddate(enddate)), 'error_log.txt')
# set plt
plt.rc('axes', titlesize=10)

# set log files
if validate:
path = os.path.join(output, str(getenddate(enddate)), lineage, 'logs/validation')
filename = path + '/' + lineage + '_log.txt'
errorlog = os.path.join(output, str(getenddate(enddate)), 'error_log_validation.txt')
else:
path = os.path.join(output, str(getenddate(enddate)), lineage, 'logs/prediction')
filename = path + '/' + lineage + '_log.txt'
errorlog = os.path.join(output, str(getenddate(enddate)), 'error_log_prediction.txt')

# filter timeseries by lineage
lineagestr = str(lineage) + '_'

Expand All @@ -40,7 +47,10 @@ def buildmodel(timeseries, lineagelist, regionlist, enddate, output, validate):
checkdistribution(X_train, lineage, alpha, filename)

# plot the autocorrelation
plotautocorr(X_train, lineage, maxlag, output, enddate)
if validate:
plotautocorr(X_train, lineage, maxlag, output, enddate, 'additional-plots/validation')
else:
plotautocorr(X_train, lineage, maxlag, output, enddate, 'additional-plots/prediction')

# check for granger causality
try:
Expand Down Expand Up @@ -111,9 +121,9 @@ def buildmodel(timeseries, lineagelist, regionlist, enddate, output, validate):
appendline(filename, 'Lineage has no cointegration => Run VAR')

if not validate:
vecautoreg(X_train, lineage, maxlag, regionlist, nsteps, alpha, filename, output, enddate)
vecautoreg(X_train, lineage, maxlag, regionlist, nsteps, alpha, filename, output, errorlog, enddate)
else:
vecautoregvalid(X_train, X_test, lineage, maxlag, regionlist, nsteps, alpha, filename, output, enddate)
vecautoregvalid(X_train, X_test, lineage, maxlag, regionlist, nsteps, alpha, filename, output, errorlog, enddate)

except (np.linalg.LinAlgError) as e:
appendline(filename, 'ERROR: Cannot build model')
Expand All @@ -138,10 +148,10 @@ def checkdistribution(X_train, lineage, alpha, filename):
appendline(filename, 'Skewness = ' + str(round(stat_skew, 4)))


def plotautocorr(X_train, lineage, maxlag, output, enddate):
def plotautocorr(X_train, lineage, maxlag, output, enddate, folder):
"""Plot autocorrelation"""

path = os.path.join(output, str(getenddate(enddate)), lineage, 'additional-plots')
path = os.path.join(output, str(getenddate(enddate)), lineage, folder)

for name, col in X_train.iteritems():
plot_acf(col, lags=maxlag)
Expand Down Expand Up @@ -263,7 +273,7 @@ def vecerrcorr(X_train, lineage, VECMdeterm, lag, coint_count, regionlist, nstep
pred = (pd.DataFrame(forecast.round(0), columns=X_train.columns, index=idx))

# cast negative predictions to zero
pred[pred<0] = 0
#pred[pred<0] = 0

path = os.path.join(output, str(getenddate(enddate)), lineage, 'prediction')

Expand Down Expand Up @@ -292,14 +302,21 @@ def vecerrcorrvalid(X_train, X_test, lineage, VECMdeterm, lag, coint_count, regi
vecm = VECM(endog = X_train, k_ar_diff = lag, coint_rank = coint_count, deterministic = VECMdeterm)

vecm_fit = vecm.fit()

try:
appendline(filename, vecm_fit.summary().as_text())
except IndexError:
appendline(filename, 'WARN: Failed to create VECM summary')
appendline(errorlog, str(lineage) + ' WARN: Failed to create VECM summary')

vecm_fit.predict(steps=nsteps)

forecast, lower, upper = vecm_fit.predict(nsteps, alpha)

pred = (pd.DataFrame(forecast.round(0), index=X_test.index, columns=X_test.columns))

# cast negative predictions to 0
pred[pred<0] = 0
#pred[pred<0] = 0

path = os.path.join(output, str(getenddate(enddate)), lineage, 'validation')

Expand All @@ -318,7 +335,7 @@ def vecerrcorrvalid(X_train, X_test, lineage, VECMdeterm, lag, coint_count, regi
plt.close()


def vecautoreg(X_train, lineage, maxlag, regionlist, nsteps, alpha, filename, output, enddate):
def vecautoreg(X_train, lineage, maxlag, regionlist, nsteps, alpha, filename, output, errorlog, enddate):
""" Build VAR model"""

# check for stationarity and difference
Expand All @@ -340,6 +357,24 @@ def vecautoreg(X_train, lineage, maxlag, regionlist, nsteps, alpha, filename, ou

appendline(filename, 'Series has been first differenced')

# add warn message if series is still not stationary
if False in adf_result:

appendline(filename, 'WARN: Series is not stationary')

appendline(errorlog, str(lineage) + ' WARN: Series is not stationary')

# plot autocorrelation again
plotautocorr(X_train, lineage, maxlag, output, enddate, 'additional-plots/prediction/VAR')

# plot series to check it's stationary
path = os.path.join(output, str(getenddate(enddate)), lineage, 'additional-plots/prediction/VAR')
X_train.plot()
plt.tight_layout()
plt.savefig(path + '/' + lineage + '_stationary_check.png')
plt.clf()
plt.close()

# build var
varm = VAR(endog = X_train)

Expand Down Expand Up @@ -369,6 +404,9 @@ def vecautoreg(X_train, lineage, maxlag, regionlist, nsteps, alpha, filename, ou
fc[str(col)]+=abs(minval)
elif VARdiff == 'none':
fc[str(col)] = fc[str(col)+'_diff']
# shift series by minimum negative value
minval = np.amin(fc[str(col)])
fc[str(col)]+=abs(minval)

# cast negative predictions to 0
#fc[fc<0] = 0
Expand All @@ -389,7 +427,7 @@ def vecautoreg(X_train, lineage, maxlag, regionlist, nsteps, alpha, filename, ou
plt.close()


def vecautoregvalid(X_train, X_test, lineage, maxlag, regionlist, nsteps, alpha, filename, output, enddate):
def vecautoregvalid(X_train, X_test, lineage, maxlag, regionlist, nsteps, alpha, filename, output, errorlog, enddate):
"""Build VAR model for validation"""

# check for stationarity and difference
Expand All @@ -410,6 +448,24 @@ def vecautoregvalid(X_train, X_test, lineage, maxlag, regionlist, nsteps, alpha,

appendline(filename, 'Series has been first differenced')

# add warn message if series is still not stationary
if False in adf_result:

appendline(filename, 'WARN: Series is not stationary')

appendline(errorlog, str(lineage) + ' WARN: Series is not stationary')

# plot autocorrelation again
plotautocorr(X_train, lineage, maxlag, output, enddate, 'additional-plots/validation/VAR')

# plot series to check it's stationary
path = os.path.join(output, str(getenddate(enddate)), lineage, 'additional-plots/validation/VAR')
X_train.plot()
plt.tight_layout()
plt.savefig(path + '/' + lineage + '_stationary_check.png')
plt.clf()
plt.close()

# build var
varm = VAR(endog = X_train)

Expand All @@ -434,7 +490,9 @@ def vecautoregvalid(X_train, X_test, lineage, maxlag, regionlist, nsteps, alpha,
fc[str(col)]+=abs(minval)
elif VARdiff == 'none':
fc[str(col)] = fc[str(col)+'_diff']

# shift series by minimum negative value
minval = np.amin(fc[str(col)])
fc[str(col)]+=abs(minval)

# cast negative predictions to 0
#fc[fc<0] = 0
Expand Down
2 changes: 1 addition & 1 deletion covate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def createoutputdir(lineage, output, enddate):

out_time = getenddate(enddate)

out_list = ['prediction', 'validation', 'logs', 'additional-plots']
out_list = ['prediction', 'validation', 'logs/prediction', 'logs/validation', 'additional-plots/prediction/VAR', 'additional-plots/validation/VAR']

for elem in out_list:
out_dir = os.path.join(out_time, lineage ,elem)
Expand Down

0 comments on commit e1acb34

Please sign in to comment.