forked from Unidata/netcdf4-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
netCDF4_utils.py
383 lines (330 loc) · 13.5 KB
/
netCDF4_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
import numpy as np
from numpy import ma
import warnings
def _sortbylist(A,B):
# sort one list (A) using the values from another list (B)
return [A[i] for i in sorted(range(len(A)), key=B.__getitem__)]
def _find_dim(grp, dimname):
# find Dimension instance given group and name.
# look in current group, and parents.
group = grp
dim = None
while 1:
try:
dim = group.dimensions[dimname]
break
except:
try:
group = group.parent
except:
raise ValueError("cannot find dimension %s in this group or parent groups" % dimname)
return dim
def _walk_grps(topgrp):
"""Iterate through all (sub-) groups of topgrp, similar to os.walktree.
"""
grps = topgrp.groups.values()
yield grps
for grp in topgrp.groups.values():
for children in _walk_grps(grp):
yield children
def _quantize(data,least_significant_digit):
"""
quantize data to improve compression. data is quantized using
around(scale*data)/scale, where scale is 2**bits, and bits is determined
from the least_significant_digit. For example, if
least_significant_digit=1, bits will be 4.
"""
precision = pow(10.,-least_significant_digit)
exp = np.log10(precision)
if exp < 0:
exp = int(np.floor(exp))
else:
exp = int(np.ceil(exp))
bits = np.ceil(np.log2(pow(10.,-exp)))
scale = pow(2.,bits)
datout = np.around(scale*data)/scale
if ma.isMA(datout):
datout.set_fill_value(data.fill_value)
return datout
else:
return datout
def _StartCountStride(elem, shape, dimensions=None, grp=None, datashape=None):
"""Return start, count, stride and indices needed to store/extract data
into/from a netCDF variable.
This function is used to convert a NumPy index into a form that is
compatible with the nc_get_vars function. Specifically, it needs
to interpret slices, ellipses, sequences of integers as well as
sequences of booleans.
Note that all the fancy indexing tricks
implemented in NumPy are not supported. In particular, multidimensional
indexing is not supported and will raise an IndexError. Note also that
boolean indexing does not work as in NumPy. In NumPy, booleans arrays
behave identically to integer indices. For netCDF variables, we thought
it would be useful to use a different logic, namely dimension independence.
What this means is that you can do:
>>> v[lat>60, lon<180, :]
to fetch the elements of v obeying conditions on latitude and longitude.
This function is used both by the __setitem__ and __getitem__ method of
the Variable class. Although the behavior is similar in both cases, there
are some differences to be noted.
Parameters
----------
elem : tuple of integer, slice, ellipsis or sequence of integers.
The indexing information for the netCDF Variable: Variable[elem]
shape : tuple
The current shape of the netCDF variable.
dimensions : sequence
The name of the dimensions. This is only useful to find out
whether or not some dimensions are unlimited. Only needed within
__setitem__.
grp : netCDF Group
The netCDF group to which the variable being set belongs to.
Only needed within __setitem__.
datashape : sequence
The shape of the data that is being stored. Only needed within
__setitem__.
Returns
-------
start : ndarray (..., n)
A starting indices array of dimension n+1. The first n
dimensions identify different independent data chunks. The last dimension
can be read as the starting indices.
count : ndarray (..., n)
An array of dimension (n+1) storing the number of elements to get.
stride : ndarray (..., n)
An array of dimension (n+1) storing the steps between each datum.
indices : ndarray (..., n)
An array storing the indices describing the location of the
data chunk in the target/source array (__getitem__/__setitem__).
Notes:
netCDF data is accessed via the function:
nc_get_vars(grpid, varid, start, count, stride, data)
Assume that the variable has dimension n, then
start is a n-tuple that contains the indices at the beginning of data chunk.
count is a n-tuple that contains the number of elements to be accessed.
stride is a n-tuple that contains the step length between each element.
"""
# Adapted from pycdf (http://pysclint.sourceforge.net/pycdf)
# by Andre Gosselin..
# Modified by David Huard to handle efficiently fancy indexing with
# sequences of integers or booleans.
nDims = len(shape)
if nDims == 0:
nDims = 1
shape = (1,)
# When a single array or (non-tuple) sequence of integers is given
# as a slice, assume it applies to the first dimension,
# and use ellipsis for remaining dimensions.
if np.iterable(elem):
if type(elem) == np.ndarray or (type(elem) != tuple and \
np.array([_is_int(e) for e in elem]).all()):
elem = [elem]
for n in range(len(elem)+1,nDims+1):
elem.append(slice(None,None,None))
else: # Convert single index to sequence
elem = [elem]
hasEllipsis = 0
newElem = []
for e in elem:
# Raise error if multidimensional indexing is used.
if np.ndim(e) > 1:
raise IndexError("Index cannot be multidimensional.")
# Replace ellipsis with slices.
if type(e) == type(Ellipsis):
if hasEllipsis:
raise IndexError("At most one ellipsis allowed in a slicing expression")
# The ellipsis stands for the missing dimensions.
newElem.extend((slice(None, None, None),) * (nDims - len(elem) + 1))
# Replace boolean array with slice object if possible.
elif getattr(getattr(e, 'dtype', None), 'kind', None) == 'b':
el = e.tolist()
if any(el):
start = el.index(True)
el.reverse()
stop = len(el)-el.index(True)
step = False
if e[start:stop].all():
step = 1
else:
n1 = start+1
ee = e[n1]
estart = e[start]
while ee != estart:
n1 = n1 + 1
ee = e[n1]
step = n1-start
# check to make sure e[start:stop:step] are all True,
# and other elements in e[start:stop] are all False.
ii = range(start,stop,step)
for i in range(start,stop):
if i not in ii:
if e[i]: step = False
else:
if not e[i]: step = False
if step: # it step False, can't convert to slice.
newElem.append(slice(start,stop,step))
else:
newElem.append(e)
else:
newElem.append(slice(0,0))
# Replace sequence of indices with slice object if possible.
elif np.iterable(e) and len(e) > 1:
start = e[0]
stop = e[-1]+1
step = e[1]-e[0]
try:
ee = range(start,stop,step)
except ValueError: # start, stop or step is not valid for a range
ee = False
if ee and len(e) == len(ee) and (e == np.arange(start,stop,step)).all():
newElem.append(slice(start,stop,step))
else:
newElem.append(e)
else:
newElem.append(e)
elem = newElem
# If slice doesn't cover all dims, assume ellipsis for rest of dims.
if len(elem) < nDims:
for n in range(len(elem)+1,nDims+1):
elem.append(slice(None,None,None))
# make sure there are not too many dimensions in slice.
if len(elem) > nDims:
raise ValueError("slicing expression exceeds the number of dimensions of the variable")
# Compute the dimensions of the start, count, stride and indices arrays.
# The number of elements in the first n dimensions corresponds to the
# number of times the _get method will be called.
sdim = []
ind_dim = None
for i, e in enumerate(elem):
# Slices
if type(e) == slice:
sdim.append(1)
# Booleans --- Same shape as data along corresponding dimension
elif getattr(getattr(e, 'dtype', None), 'kind', None) == 'b':
if shape[i] != len(e):
msg="""
Boolean array must have the same shape as the data along this dimension."""
raise IndexError(msg)
sdim.append(e.sum())
# Sequence of indices
# If multiple sequences are used, they must have the same length.
elif np.iterable(e):
if ind_dim is None:
sdim.append(np.alen(e))
ind_dim = i
elif np.alen(e) == 1:
sdim.append(1)
elif np.alen(e) == sdim[ind_dim]:
sdim.append(np.alen(e))
else:
raise IndexError("Indice mismatch. Indices must have the same length.")
# Scalar
else:
sdim.append(1)
# Create the start, count, stride and indices arrays.
sdim.append(max(nDims, 1))
start = np.empty(sdim, dtype=int)
count = np.empty(sdim, dtype=int)
stride = np.empty(sdim, dtype=int)
indices = np.empty(sdim, dtype=object)
for i, e in enumerate(elem):
# if dimensions and grp are given, set unlim flag for this dimension.
if (dimensions is not None and grp is not None) and len(dimensions):
dimname = dimensions[i]
# is this dimension unlimited?
# look in current group, and parents for dim.
dim = _find_dim(grp, dimname)
unlim = dim.isunlimited()
else:
unlim = False
# SLICE #
if type(e) == slice:
# determine length parameter for slice.indices.
# shape[i] can be zero for unlim dim that hasn't been written to
# yet.
# length of slice may be longer than current shape
# if dimension is unlimited.
if unlim and e.stop is not None and e.stop > shape[i]:
length = e.stop
elif unlim and e.stop is None and datashape != ():
if e.start is None:
length = datashape[i]
else:
length = e.start+datashape[i]
else:
if unlim and datashape == () and len(dim) == 0:
# writing scalar along unlimited dimension using slicing
# syntax (var[:] = 1, when var.shape = ())
length = 1
else:
length = shape[i]
beg, end, inc = e.indices(length)
n = len(range(beg,end,inc))
start[...,i] = beg
count[...,i] = n
stride[...,i] = inc
indices[...,i] = slice(None)
# STRING #
elif type(e) is str:
raise IndexError("Index cannot be a string.")
# ITERABLE #
elif np.iterable(e) and np.array(e).dtype.kind in 'ib': # Sequence of integers or booleans
# BOOLEAN ARRAY #
if type(e) == np.ndarray and e.dtype.kind == 'b':
e = np.arange(len(e))[e]
# Originally, I thought boolean indexing worked differently than
# integer indexing, namely that we could select the rows and columns
# independently.
start[...,i] = np.apply_along_axis(lambda x: np.array(e)*x, i, np.ones(sdim[:-1]))
indices[...,i] = np.apply_along_axis(lambda x: np.arange(sdim[i])*x, i, np.ones(sdim[:-1], int))
# Sequence of INTEGER INDICES
else:
start[...,i] = np.apply_along_axis(lambda x: np.array(e)*x, ind_dim, np.ones(sdim[:-1]))
if i == ind_dim:
indices[...,i] = np.apply_along_axis(lambda x: np.arange(sdim[i])*x, ind_dim, np.ones(sdim[:-1], int))
else:
indices[...,i] = -1
count[...,i] = 1
stride[...,i] = 1
# SCALAR INTEGER #
elif np.alen(e)==1 and np.dtype(type(e)).kind == 'i':
if e >= 0:
start[...,i] = e
elif e < 0 and (-e <= shape[i]) :
start[...,i] = e+shape[i]
else:
raise IndexError("Index out of range")
count[...,i] = 1
stride[...,i] = 1
indices[...,i] = -1 # Use -1 instead of 0 to indicate that
# this dimension shall be squeezed.
return start, count, stride, indices#, out_shape
def _out_array_shape(count):
"""Return the output array shape given the count array created by getStartCountStride"""
s = list(count.shape[:-1])
out = []
for i, n in enumerate(s):
if n == 1:
c = count[..., i].ravel()[0] # All elements should be identical.
out.append(c)
else:
out.append(n)
return out
def _is_container(a):
# is object container-like? (can test for
# membership with "is in", but not a string)
try: 1 in a
except: return False
if type(a) == type(basestring): return False
return True
def _is_int(a):
try:
return int(a) == a
except:
return False
def _tostr(s):
try:
ss = str(s)
except:
ss = s
return ss