From c3b06cda3141ac2c2e433e01b3faef248bc33893 Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Thu, 3 Jan 2019 14:14:51 +0100 Subject: [PATCH] ENH: add ability to get daily multi-period forward returns rather than only cumulative --- alphalens/tests/test_utils.py | 17 +++++++++++++++++ alphalens/utils.py | 24 ++++++++++++++++++++---- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/alphalens/tests/test_utils.py b/alphalens/tests/test_utils.py index b0611975..8c9537f7 100644 --- a/alphalens/tests/test_utils.py +++ b/alphalens/tests/test_utils.py @@ -81,6 +81,23 @@ def test_compute_forward_returns(self): assert_frame_equal(fp, expected) + def test_compute_forward_returns_non_cum(self): + dr = date_range(start='2015-1-1', end='2015-1-3') + prices = DataFrame(index=dr, columns=['A', 'B'], + data=[[1, 1], [1, 2], [2, 1]]) + factor = prices.stack() + + fp = compute_forward_returns(factor, prices, periods=[1, 2], + cumulative_returns=False) + + ix = MultiIndex.from_product([dr, ['A', 'B']], + names=['date', 'asset']) + expected = DataFrame(index=ix, columns=['1D', '2D']) + expected['1D'] = [0., 1., 1., -0.5, nan, nan] + expected['2D'] = [1., -0.5, nan, nan, nan, nan] + + assert_frame_equal(fp, expected) + @parameterized.expand([(factor_data, 4, None, False, False, [1, 2, 3, 4, 4, 3, 2, 1]), (factor_data, 2, None, False, False, diff --git a/alphalens/utils.py b/alphalens/utils.py index da46522d..ee0f2b54 100644 --- a/alphalens/utils.py +++ b/alphalens/utils.py @@ -215,7 +215,8 @@ def infer_trading_calendar(factor_idx, prices_idx): def compute_forward_returns(factor, prices, periods=(1, 5, 10), - filter_zscore=None): + filter_zscore=None, + cumulative_returns=True): """ Finds the N period forward returns (as percent change) for each asset provided. @@ -240,6 +241,10 @@ def compute_forward_returns(factor, Sets forward returns greater than X standard deviations from the the mean to nan. Set it to 'None' to avoid filtering. Caution: this outlier filtering incorporates lookahead bias. + cumulative_returns : bool, optional + If True, forward returns columns will contain cumulative returns. + Setting this to False is useful if you want to analyze how predictive + a factor is for a single forward day. Returns ------- @@ -278,8 +283,13 @@ def compute_forward_returns(factor, column_list = [] for period in sorted(periods): + if cumulative_returns: + returns = prices.pct_change(period) + else: + returns = prices.pct_change() + forward_returns = \ - prices.pct_change(period).shift(-period).reindex(factor_dateindex) + returns.shift(-period).reindex(factor_dateindex) if filter_zscore is not None: mask = abs( @@ -631,7 +641,8 @@ def get_clean_factor_and_forward_returns(factor, filter_zscore=20, groupby_labels=None, max_loss=0.35, - zero_aware=False): + zero_aware=False, + cumulative_returns=True): """ Formats the factor data, pricing data, and group mappings into a DataFrame that contains aligned MultiIndex indices of timestamp and asset. The @@ -741,6 +752,10 @@ def get_clean_factor_and_forward_returns(factor, If True, compute quantile buckets separately for positive and negative signal values. This is useful if your signal is centered and zero is the separation between long and short signals, respectively. + cumulative_returns : bool, optional + If True, forward returns columns will contain cumulative returns. + Setting this to False is useful if you want to analyze how predictive + a factor is for a single forward day. Returns ------- @@ -774,7 +789,8 @@ def get_clean_factor_and_forward_returns(factor, """ forward_returns = compute_forward_returns(factor, prices, periods, - filter_zscore) + filter_zscore, + cumulative_returns) factor_data = get_clean_factor(factor, forward_returns, groupby=groupby, groupby_labels=groupby_labels,