From 7c11dba22e7b023a59626b3e00ce116722566b9b Mon Sep 17 00:00:00 2001 From: Jack Reeves Eyre Date: Wed, 17 Aug 2022 14:11:48 -0400 Subject: [PATCH 1/5] First attempt at new python feature. --- python/_bufrlib.pyf | 5 +++++ python/ncepbufr/__init__.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/python/_bufrlib.pyf b/python/_bufrlib.pyf index 40ee6f60..ca4efd2b 100644 --- a/python/_bufrlib.pyf +++ b/python/_bufrlib.pyf @@ -106,6 +106,11 @@ subroutine ufbint(lunin,usr,i1,i2,iret,str) ! in ufbint.f integer, intent(out) :: iret character*(*), intent(in) :: str end subroutine ufbint +subroutine readlc(lunit, chr, str) ! in readlc.f + integer,intent(in) :: lunit + character*(*), intent(out) :: chr + character*(*), intent(in) :: str +end subroutine readlc subroutine ufbqcd(lunit,nemo,qcd) ! in ufbqcd.f integer, intent(in) :: lunit character*(*), intent(in) :: nemo diff --git a/python/ncepbufr/__init__.py b/python/ncepbufr/__init__.py index dd8fb3d2..ec45b8be 100644 --- a/python/ncepbufr/__init__.py +++ b/python/ncepbufr/__init__.py @@ -469,6 +469,22 @@ def load_subset(self): if iret == 0: self.subset_loaded = True return iret + def read_long_string(self,mnemonic): + """ + Decode character string from the currently loaded message subset + using the specified mnemonic (a 'mnemonic' is simply a + descriptive, alphanumeric name for a data value, like + a key in a python dictionary). The mnemonic string + must be a single mnemonic only. + + Returns the character string, if found, or "MISSING" if not. + """ + if not self.subset_loaded: + raise IOError('subset not loaded, call load_subset first') + if len(mnemonic.split()) > 1: + raise ValueError('only one mnemonic per call to read_long_string') + data = _bufrlib.readlc(self.lunit,data,mnemonic) + return data def read_subset(self,mnemonics,rep=False,seq=False,events=False): """ decode the data from the currently loaded message subset From d398a5e3bfe2607fb24309bc9fed5f03cd26dd8f Mon Sep 17 00:00:00 2001 From: Jack Reeves Eyre Date: Wed, 17 Aug 2022 17:29:53 -0400 Subject: [PATCH 2/5] Troubleshooting to get read_long_string working. --- python/_bufrlib.pyf | 2 +- python/ncepbufr/__init__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/_bufrlib.pyf b/python/_bufrlib.pyf index ca4efd2b..7a8fa6d7 100644 --- a/python/_bufrlib.pyf +++ b/python/_bufrlib.pyf @@ -108,7 +108,7 @@ subroutine ufbint(lunin,usr,i1,i2,iret,str) ! in ufbint.f end subroutine ufbint subroutine readlc(lunit, chr, str) ! in readlc.f integer,intent(in) :: lunit - character*(*), intent(out) :: chr + character*120, intent(out) :: chr character*(*), intent(in) :: str end subroutine readlc subroutine ufbqcd(lunit,nemo,qcd) ! in ufbqcd.f diff --git a/python/ncepbufr/__init__.py b/python/ncepbufr/__init__.py index ec45b8be..e1c8c497 100644 --- a/python/ncepbufr/__init__.py +++ b/python/ncepbufr/__init__.py @@ -483,8 +483,8 @@ def read_long_string(self,mnemonic): raise IOError('subset not loaded, call load_subset first') if len(mnemonic.split()) > 1: raise ValueError('only one mnemonic per call to read_long_string') - data = _bufrlib.readlc(self.lunit,data,mnemonic) - return data + long_string = _bufrlib.readlc(self.lunit,mnemonic) + return str(long_string, encoding='ascii').strip() def read_subset(self,mnemonics,rep=False,seq=False,events=False): """ decode the data from the currently loaded message subset From 74c4776c9bcf52c1de607fb2a2a5d02f55612092 Mon Sep 17 00:00:00 2001 From: Jack Reeves Eyre Date: Mon, 22 Aug 2022 15:40:30 -0400 Subject: [PATCH 3/5] Adds more error handling and a simple test. Error handling now tries an alternative text decoding (extended ASCII) before raising an error. This should help with some French-language station names (and maybe other examples) with accents. --- python/ncepbufr/__init__.py | 24 +++++++++++++++++++++++- python/test/test.py | 21 +++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/python/ncepbufr/__init__.py b/python/ncepbufr/__init__.py index e1c8c497..7d66d97e 100644 --- a/python/ncepbufr/__init__.py +++ b/python/ncepbufr/__init__.py @@ -478,13 +478,35 @@ def read_long_string(self,mnemonic): must be a single mnemonic only. Returns the character string, if found, or "MISSING" if not. + + Example: + + :::python + >>> bufr = ncepbufr.open(filename) + >>> while bufr.advance() == 0: + >>> while bufr.load_subset() == 0: + >>> st_name = bufr.read_long_string(mnemonic='STSN') """ if not self.subset_loaded: raise IOError('subset not loaded, call load_subset first') if len(mnemonic.split()) > 1: raise ValueError('only one mnemonic per call to read_long_string') long_string = _bufrlib.readlc(self.lunit,mnemonic) - return str(long_string, encoding='ascii').strip() + try: + result = str(long_string, encoding='ascii').strip() + except UnicodeDecodeError: + try: + if all([bt == int('0xff',16) for bt in long_string.strip()]): + # All values set to 255 for missing data. + result = 'MISSING' + else: + # Extended ASCII for Roman alphabet accents. + result = str(long_string, encoding='cp1252').strip() + except Exception as error: + print(f"An exception occurred {error}") + except Exception as error: + print(f"An exception occurred {error}") + return result def read_subset(self,mnemonics,rep=False,seq=False,events=False): """ decode the data from the currently loaded message subset diff --git a/python/test/test.py b/python/test/test.py index 5338090c..f27a8c59 100644 --- a/python/test/test.py +++ b/python/test/test.py @@ -187,3 +187,24 @@ np.testing.assert_array_almost_equal(oer_save.filled(), oer2.filled()) np.testing.assert_array_almost_equal(qcf_save.filled(), qcf2.filled()) bufr.close() + +# test reading long strings +bufr = ncepbufr.open('data/xx103') +test_station_names = ['BOUEE_LION', 'BOUEE_ANTILLES', + 'BOUEE_COTE D\'AZUR', + 'GULF OF MAINE', 'TENERIFE'] +test_report_ids = ['6100002', '4100300', '6100001', '4400005', '1300131'] +i_msg = 0 +while bufr.advance() == 0: + # Just read the first subset from each message. + if bufr.load_subset() == 0: + stsn = bufr.read_long_string(mnemonic='STSN') + rpid = bufr.read_long_string(mnemonic='RPID') + assert stsn == test_station_names[i_msg] + assert rpid == test_report_ids[i_msg] + i_msg = i_msg + 1 + # only loop over first 5 subsets + if i_msg == 5: break +bufr.close() + + From e6f03fcc0a3d8641ad9d4c76dd419cac91a5d281 Mon Sep 17 00:00:00 2001 From: Jack Reeves Eyre Date: Tue, 23 Aug 2022 16:48:07 -0400 Subject: [PATCH 4/5] Reduces max length of read_long_string; adds documentation. The maximum length of string was set to 120 -- thought to be more than needed. This commit changes it to 64. Documentation improved by describing the methodology built into the fortran "readlc" routine -- ability to retrieve one particular occurence of a long string when multiple occur in the same subset. --- python/_bufrlib.pyf | 2 +- python/ncepbufr/__init__.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/python/_bufrlib.pyf b/python/_bufrlib.pyf index 7a8fa6d7..ba09d6a0 100644 --- a/python/_bufrlib.pyf +++ b/python/_bufrlib.pyf @@ -108,7 +108,7 @@ subroutine ufbint(lunin,usr,i1,i2,iret,str) ! in ufbint.f end subroutine ufbint subroutine readlc(lunit, chr, str) ! in readlc.f integer,intent(in) :: lunit - character*120, intent(out) :: chr + character*64, intent(out) :: chr character*(*), intent(in) :: str end subroutine readlc subroutine ufbqcd(lunit,nemo,qcd) ! in ufbqcd.f diff --git a/python/ncepbufr/__init__.py b/python/ncepbufr/__init__.py index 7d66d97e..1eec0e0a 100644 --- a/python/ncepbufr/__init__.py +++ b/python/ncepbufr/__init__.py @@ -475,7 +475,11 @@ def read_long_string(self,mnemonic): using the specified mnemonic (a 'mnemonic' is simply a descriptive, alphanumeric name for a data value, like a key in a python dictionary). The mnemonic string - must be a single mnemonic only. + must be a single mnemonic only. If the subset contains more + than one occurrence of the mnemonic, then can append '#X' to + the mnemonic to request the character string corresponding to + the Xth occurrence of the mnemonic, counting from the beginning + of the subset. Otherwise, X is assumed to be 1. Returns the character string, if found, or "MISSING" if not. From 6a5c6da348436d416429ea666610875c398cf4a7 Mon Sep 17 00:00:00 2001 From: Jeff Ator Date: Wed, 24 Aug 2022 18:44:39 +0000 Subject: [PATCH 5/5] minor updates to remove warnings for deprecated Python methods --- python/test/test.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/python/test/test.py b/python/test/test.py index f27a8c59..d1fefb47 100644 --- a/python/test/test.py +++ b/python/test/test.py @@ -11,7 +11,7 @@ while bufr.advance() == 0: # loop over messages. while bufr.load_subset() == 0: # loop over subsets in message. hdr = bufr.read_subset(hdstr).squeeze() - station_id = hdr[0].tostring() + station_id = hdr[0].tobytes() lon = hdr[1]; lat = hdr[2] station_type = int(hdr[4]) obs = bufr.read_subset(obstr) @@ -115,7 +115,7 @@ np.testing.assert_almost_equal(lon,-167.3253) obs_tst=np.array([1.4555e+02,1.4618e+02,2.1374e+02,2.4871e+02,2.4807e+02,2.3607e+02,\ 2.2802e+02,2.2255e+02,2.1699e+02,2.1880e+02,2.2440e+02,2.2970e+02,\ - 2.3407e+02,1.0000e+11,2.0008e+02],np.float) + 2.3407e+02,1.0000e+11,2.0008e+02],np.float64) np.testing.assert_array_almost_equal(obs,obs_tst) bufr.close() @@ -158,7 +158,7 @@ while bufr.advance() == 0: while bufr.load_subset() == 0: hdr = bufr.read_subset(hdstr).squeeze() - station_id = hdr[0].tostring() + station_id = hdr[0].tobytes() obs = bufr.read_subset(obstr) nlevs = obs.shape[-1] oer = bufr.read_subset(oestr) @@ -174,7 +174,7 @@ bufr.restore() bufr.load_subset() hdr = bufr.read_subset(hdstr).squeeze() -station_id = hdr[0].tostring() +station_id = hdr[0].tobytes() obs2 = bufr.read_subset(obstr) nlevs = obs2.shape[-1] oer2 = bufr.read_subset(oestr) @@ -206,5 +206,3 @@ # only loop over first 5 subsets if i_msg == 5: break bufr.close() - -