diff --git a/python/_bufrlib.pyf b/python/_bufrlib.pyf index 40ee6f60..ba09d6a0 100644 --- a/python/_bufrlib.pyf +++ b/python/_bufrlib.pyf @@ -106,6 +106,11 @@ subroutine ufbint(lunin,usr,i1,i2,iret,str) ! in ufbint.f integer, intent(out) :: iret character*(*), intent(in) :: str end subroutine ufbint +subroutine readlc(lunit, chr, str) ! in readlc.f + integer,intent(in) :: lunit + character*64, intent(out) :: chr + character*(*), intent(in) :: str +end subroutine readlc subroutine ufbqcd(lunit,nemo,qcd) ! in ufbqcd.f integer, intent(in) :: lunit character*(*), intent(in) :: nemo diff --git a/python/ncepbufr/__init__.py b/python/ncepbufr/__init__.py index dd8fb3d2..1eec0e0a 100644 --- a/python/ncepbufr/__init__.py +++ b/python/ncepbufr/__init__.py @@ -469,6 +469,48 @@ def load_subset(self): if iret == 0: self.subset_loaded = True return iret + def read_long_string(self,mnemonic): + """ + Decode character string from the currently loaded message subset + using the specified mnemonic (a 'mnemonic' is simply a + descriptive, alphanumeric name for a data value, like + a key in a python dictionary). The mnemonic string + must be a single mnemonic only. If the subset contains more + than one occurrence of the mnemonic, then can append '#X' to + the mnemonic to request the character string corresponding to + the Xth occurrence of the mnemonic, counting from the beginning + of the subset. Otherwise, X is assumed to be 1. + + Returns the character string, if found, or "MISSING" if not. + + Example: + + :::python + >>> bufr = ncepbufr.open(filename) + >>> while bufr.advance() == 0: + >>> while bufr.load_subset() == 0: + >>> st_name = bufr.read_long_string(mnemonic='STSN') + """ + if not self.subset_loaded: + raise IOError('subset not loaded, call load_subset first') + if len(mnemonic.split()) > 1: + raise ValueError('only one mnemonic per call to read_long_string') + long_string = _bufrlib.readlc(self.lunit,mnemonic) + try: + result = str(long_string, encoding='ascii').strip() + except UnicodeDecodeError: + try: + if all([bt == int('0xff',16) for bt in long_string.strip()]): + # All values set to 255 for missing data. + result = 'MISSING' + else: + # Extended ASCII for Roman alphabet accents. + result = str(long_string, encoding='cp1252').strip() + except Exception as error: + print(f"An exception occurred {error}") + except Exception as error: + print(f"An exception occurred {error}") + return result def read_subset(self,mnemonics,rep=False,seq=False,events=False): """ decode the data from the currently loaded message subset diff --git a/python/test/test.py b/python/test/test.py index 5338090c..d1fefb47 100644 --- a/python/test/test.py +++ b/python/test/test.py @@ -11,7 +11,7 @@ while bufr.advance() == 0: # loop over messages. while bufr.load_subset() == 0: # loop over subsets in message. hdr = bufr.read_subset(hdstr).squeeze() - station_id = hdr[0].tostring() + station_id = hdr[0].tobytes() lon = hdr[1]; lat = hdr[2] station_type = int(hdr[4]) obs = bufr.read_subset(obstr) @@ -115,7 +115,7 @@ np.testing.assert_almost_equal(lon,-167.3253) obs_tst=np.array([1.4555e+02,1.4618e+02,2.1374e+02,2.4871e+02,2.4807e+02,2.3607e+02,\ 2.2802e+02,2.2255e+02,2.1699e+02,2.1880e+02,2.2440e+02,2.2970e+02,\ - 2.3407e+02,1.0000e+11,2.0008e+02],np.float) + 2.3407e+02,1.0000e+11,2.0008e+02],np.float64) np.testing.assert_array_almost_equal(obs,obs_tst) bufr.close() @@ -158,7 +158,7 @@ while bufr.advance() == 0: while bufr.load_subset() == 0: hdr = bufr.read_subset(hdstr).squeeze() - station_id = hdr[0].tostring() + station_id = hdr[0].tobytes() obs = bufr.read_subset(obstr) nlevs = obs.shape[-1] oer = bufr.read_subset(oestr) @@ -174,7 +174,7 @@ bufr.restore() bufr.load_subset() hdr = bufr.read_subset(hdstr).squeeze() -station_id = hdr[0].tostring() +station_id = hdr[0].tobytes() obs2 = bufr.read_subset(obstr) nlevs = obs2.shape[-1] oer2 = bufr.read_subset(oestr) @@ -187,3 +187,22 @@ np.testing.assert_array_almost_equal(oer_save.filled(), oer2.filled()) np.testing.assert_array_almost_equal(qcf_save.filled(), qcf2.filled()) bufr.close() + +# test reading long strings +bufr = ncepbufr.open('data/xx103') +test_station_names = ['BOUEE_LION', 'BOUEE_ANTILLES', + 'BOUEE_COTE D\'AZUR', + 'GULF OF MAINE', 'TENERIFE'] +test_report_ids = ['6100002', '4100300', '6100001', '4400005', '1300131'] +i_msg = 0 +while bufr.advance() == 0: + # Just read the first subset from each message. + if bufr.load_subset() == 0: + stsn = bufr.read_long_string(mnemonic='STSN') + rpid = bufr.read_long_string(mnemonic='RPID') + assert stsn == test_station_names[i_msg] + assert rpid == test_report_ids[i_msg] + i_msg = i_msg + 1 + # only loop over first 5 subsets + if i_msg == 5: break +bufr.close()