Skip to content

Commit

Permalink
Merge pull request #228 from JackReevesEyre/jre_py_long_strings
Browse files Browse the repository at this point in the history
Add python read_long_string function
  • Loading branch information
jbathegit authored Aug 25, 2022
2 parents 8519a73 + 6a5c6da commit 0997671
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 4 deletions.
5 changes: 5 additions & 0 deletions python/_bufrlib.pyf
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ subroutine ufbint(lunin,usr,i1,i2,iret,str) ! in ufbint.f
integer, intent(out) :: iret
character*(*), intent(in) :: str
end subroutine ufbint
subroutine readlc(lunit, chr, str) ! in readlc.f
integer,intent(in) :: lunit
character*64, intent(out) :: chr
character*(*), intent(in) :: str
end subroutine readlc
subroutine ufbqcd(lunit,nemo,qcd) ! in ufbqcd.f
integer, intent(in) :: lunit
character*(*), intent(in) :: nemo
Expand Down
42 changes: 42 additions & 0 deletions python/ncepbufr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,48 @@ def load_subset(self):
if iret == 0:
self.subset_loaded = True
return iret
def read_long_string(self,mnemonic):
"""
Decode character string from the currently loaded message subset
using the specified mnemonic (a 'mnemonic' is simply a
descriptive, alphanumeric name for a data value, like
a key in a python dictionary). The mnemonic string
must be a single mnemonic only. If the subset contains more
than one occurrence of the mnemonic, then can append '#X' to
the mnemonic to request the character string corresponding to
the Xth occurrence of the mnemonic, counting from the beginning
of the subset. Otherwise, X is assumed to be 1.
Returns the character string, if found, or "MISSING" if not.
Example:
:::python
>>> bufr = ncepbufr.open(filename)
>>> while bufr.advance() == 0:
>>> while bufr.load_subset() == 0:
>>> st_name = bufr.read_long_string(mnemonic='STSN')
"""
if not self.subset_loaded:
raise IOError('subset not loaded, call load_subset first')
if len(mnemonic.split()) > 1:
raise ValueError('only one mnemonic per call to read_long_string')
long_string = _bufrlib.readlc(self.lunit,mnemonic)
try:
result = str(long_string, encoding='ascii').strip()
except UnicodeDecodeError:
try:
if all([bt == int('0xff',16) for bt in long_string.strip()]):
# All values set to 255 for missing data.
result = 'MISSING'
else:
# Extended ASCII for Roman alphabet accents.
result = str(long_string, encoding='cp1252').strip()
except Exception as error:
print(f"An exception occurred {error}")
except Exception as error:
print(f"An exception occurred {error}")
return result
def read_subset(self,mnemonics,rep=False,seq=False,events=False):
"""
decode the data from the currently loaded message subset
Expand Down
27 changes: 23 additions & 4 deletions python/test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
while bufr.advance() == 0: # loop over messages.
while bufr.load_subset() == 0: # loop over subsets in message.
hdr = bufr.read_subset(hdstr).squeeze()
station_id = hdr[0].tostring()
station_id = hdr[0].tobytes()
lon = hdr[1]; lat = hdr[2]
station_type = int(hdr[4])
obs = bufr.read_subset(obstr)
Expand Down Expand Up @@ -115,7 +115,7 @@
np.testing.assert_almost_equal(lon,-167.3253)
obs_tst=np.array([1.4555e+02,1.4618e+02,2.1374e+02,2.4871e+02,2.4807e+02,2.3607e+02,\
2.2802e+02,2.2255e+02,2.1699e+02,2.1880e+02,2.2440e+02,2.2970e+02,\
2.3407e+02,1.0000e+11,2.0008e+02],np.float)
2.3407e+02,1.0000e+11,2.0008e+02],np.float64)
np.testing.assert_array_almost_equal(obs,obs_tst)
bufr.close()

Expand Down Expand Up @@ -158,7 +158,7 @@
while bufr.advance() == 0:
while bufr.load_subset() == 0:
hdr = bufr.read_subset(hdstr).squeeze()
station_id = hdr[0].tostring()
station_id = hdr[0].tobytes()
obs = bufr.read_subset(obstr)
nlevs = obs.shape[-1]
oer = bufr.read_subset(oestr)
Expand All @@ -174,7 +174,7 @@
bufr.restore()
bufr.load_subset()
hdr = bufr.read_subset(hdstr).squeeze()
station_id = hdr[0].tostring()
station_id = hdr[0].tobytes()
obs2 = bufr.read_subset(obstr)
nlevs = obs2.shape[-1]
oer2 = bufr.read_subset(oestr)
Expand All @@ -187,3 +187,22 @@
np.testing.assert_array_almost_equal(oer_save.filled(), oer2.filled())
np.testing.assert_array_almost_equal(qcf_save.filled(), qcf2.filled())
bufr.close()

# test reading long strings
bufr = ncepbufr.open('data/xx103')
test_station_names = ['BOUEE_LION', 'BOUEE_ANTILLES',
'BOUEE_COTE D\'AZUR',
'GULF OF MAINE', 'TENERIFE']
test_report_ids = ['6100002', '4100300', '6100001', '4400005', '1300131']
i_msg = 0
while bufr.advance() == 0:
# Just read the first subset from each message.
if bufr.load_subset() == 0:
stsn = bufr.read_long_string(mnemonic='STSN')
rpid = bufr.read_long_string(mnemonic='RPID')
assert stsn == test_station_names[i_msg]
assert rpid == test_report_ids[i_msg]
i_msg = i_msg + 1
# only loop over first 5 subsets
if i_msg == 5: break
bufr.close()

0 comments on commit 0997671

Please sign in to comment.