-
Notifications
You must be signed in to change notification settings - Fork 1
/
fourier_fa.py
241 lines (208 loc) · 9.58 KB
/
fourier_fa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# fourier_fa.py - Python implementation of the Fourier Basis
#
# Source paper (please cite!):
# G.D. Konidaris, S. Osentoski, and P.S. Thomas. Value Function
# Approximation in Reinforcement Learning using the Fourier Basis.
# In Proceedings of the Twenty-Fifth AAAI Conference on Artificial
# Intelligence, pages 380-385, August 2011.
#
# Note: this class computes the coefficient matrix at instantiation,
# and stores a matrix of size c \times d (where c is the number of
# basis functions and d is the input state dimensionality) in memory
# for the lifetime of the class instance. That can cause memory
# issues since c grows exponentially with d and the basis order. An
# alternative implementation (not given here) is to recompute the
# coefficients whenever the basis must be evaluated. This only
# ever stores O(c) data in memory, at the cost of more expensive
# evaluation computation.
#
# Author: George Konidaris (gdk@cs.brown.edu)
# Creation Date: February 22nd 2021
# Copyright 2021 Brown University
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import numpy as np
import sys
import itertools
from timeit import default_timer as timer
class FourierBasis:
"""
A class implementing the Fourier Basis.
Methods
-------
length()
returns the number of basis functions
get_coefficients()
returns the Fourier Basis coefficients
get_coefficient(function_no)
returns the coefficients of a specific Fourier Basis function
get_gradient_factors()
return the gradient scaling factors
get_gradient_factor(function_no)
return a specific gradient scaling factor
evaluate(state_vector)
evaluate the basis functions at a given state
Attributes (should not typically be accessed directly)
----------
d : integer
the dimensionality of the input state
order : numpy array of d elements
the Fourier basis order for each state dimension
independent: boolean
indicates whether state variables are considered separately
(True) or cross-variable dependencies are included (False)
coefficients : 2D numpy array
Fourier basis coefficients; k x d array, where k is the
number of basis functions and d the state dimensionality
gradient_factors: 2D numpy array
Fourier basis gradient scaling; same dimensionality as
coefficients
"""
def __init__(self, order, d = None, independent=False):
"""
Constructs a Fourier Basis instantiation.
Note that this implementation constructs a Numpy array of all
the Fourier coefficients, which supports efficient basis
computation but is memory-heavy - O(bd), where b is the number
of basis functions and d is the state dimensionality. An
alternative implementation recomputes each coefficients vector
at evaluation time, and is only O(d) memory.
Parameters
----------
order : int or list of d ints
either the maximum coefficient attached to any state
variable, or a list of such maximial values for each state
variable. If the latter, d can be omitted.
d : int, optional
the dimensionality of the state space; if None, obtained by
len(order) (order must in that case be a list).
independent: boolean, optional
whether or not the dimensions are tiled independently;
False by default. If True, the size of basis is not
exponential in d and order.
"""
# Instance variables
self.coefficients = np.array([])
self.gradient_factors = np.array([])
self.order = []
self.d = 0
self.independent = independent
# Input type checking
if(d is None):
# If no dimensionality specified, get it from order parameter (must be able to call len).
if(type(order) in [list,tuple,np.ndarray]):
self.d = len(order)
self.order = order
else:
print("FourierBasis error: must either specify input dimension 'd' or order must provide a d-length list of coefficient order in `order'.", file = sys.stderr)
return
else:
# If dimensionality specified, confirm against 'order'.
# If 'order' is an integer not a list, assume that's the order in each of 'd' dimensions.
self.d = d
if(type(order) in [list,tuple,np.ndarray]):
if(d != len(order)):
print("FourierBasis error: parameter 'd' does not match length of coefficient order list `order'.", file = sys.stderr)
return
self.order = order
elif(type(order) != int):
print("FourierBasis error: parameter 'order' must be an integer, if not a list.", file = sys.stderr)
return
else:
# Order list should be 'd' copies of the integer in 'order'
self.order = [order]*d
# Generate coefficients
if(self.independent):
coeffs = []
coeffs.append(np.zeros([self.d]))
for i in range(0, self.d):
for c in range(0, self.order[i]):
v = np.zeros([self.d])
v[i] = c + 1
coeffs.append(v)
else:
coeffs = []
prods = [range(0, o+1) for o in self.order]
coeffs = [v for v in itertools.product(*prods)]
self.coefficients = np.array(coeffs)
with np.errstate(divide='ignore', invalid='ignore'):
self.gradient_factors = 1.0 / np.linalg.norm(self.coefficients, ord=2, axis=1)
self.gradient_factors[0] = 1.0 # Overwrite division by zero for function with all-zero coefficients.
def length(self):
"""Return the number of basis functions."""
return self.coefficients.shape[0]
def get_coefficients(self):
"""Returns an b x d numpy array of coefficients, where b
is the number of basis functions, and is the state dimension."""
return self.coefficients
def get_coefficient(self, function_no):
""" Returns the coefficients (a d-dimensional numpy array,
where d is the state dimensionality) for the individual
basis function specified by function_no."""
return self.coefficients[function_no, :]
def get_gradient_factors(self):
""" Returns a numpy array of the gradient correction terms for
all basis functions."""
return self.gradient_factors
def get_gradient_factor(self, function_no):
""" Returns the gradient correction term for the specific basis
function function_no."""
return self.gradient_factors[function_no]
def evaluate(self, state_vector):
"""
Computes basis function values at a given state.
Parameters
----------
state_vector : a list of d real-values
d is state dimensionality; each value should be in [0, 1]
If the list is not a numpy array, it is converted to one.
Returns
-------
numpy array
An n-dimensional numpy array containing a basis function
value for each of the n functions in the basis.
"""
# Type check state vector, try to convert if not ndarray.
if(type(state_vector) is not np.ndarray):
state_vector = np.array(state_vector)
# Dimension check state vector
state_shape = state_vector.shape
if(not ((state_shape == (self.d, )) or (state_shape == (self.d, 1)))):
print("FourierBasis warning: state vector shape is " + str(state_shape) + ", expecting " + str((self.d, )) + " or " + str((self.d, 1)), file = sys.stderr)
return None
# Bounds check state vector
if(np.min(state_vector) < 0.0):
print("FourierBasis warning: given state vector with entry less than 0 (" + str(np.min(state_vector)) + " at index " + str(np.argmin(state_vector)) + ").", file = sys.stderr)
if(np.max(state_vector) > 1.0):
print("FourierBasis warning: given state vector with entry greater than 1 (" + str(np.max(state_vector)) + " at index " + str(np.argmax(state_vector)) + ").", file = sys.stderr)
# Compute the Fourier Basis feature values
return np.cos(np.pi*np.dot(self.coefficients, state_vector))
if __name__ == "__main__":
start = timer()
fb = FourierBasis(order=7, d=7)
end = timer()
print("Constructed", fb.length(), "basis functions in", end-start, " seconds")
start = timer()
fb.evaluate([0.1]*7)
end = timer()
print(end - start, "seconds to evaluate", fb.length(), "basis functions")
start = timer()
fb.evaluate([0.3]*7)
end = timer()
print(end - start, "seconds to evaluate", fb.length(), "basis functions")
start = timer()
fb.evaluate([0.25]*7)
end = timer()
print(end - start, "seconds to evaluate", fb.length(), "basis functions")