-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added process,test and lookup zip creator codes for glmcierra
- Loading branch information
Showing
10 changed files
with
506 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
mdx/granule_metadata_extractor/processing/process_glmcierra.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
from ..src.extract_netcdf_metadata import ExtractNetCDFMetadata | ||
import os | ||
import numpy as np | ||
from datetime import datetime, timedelta | ||
from netCDF4 import Dataset | ||
|
||
class ExtractGlmcierraMetadata(ExtractNetCDFMetadata): | ||
""" | ||
A class to extract glmcierra | ||
""" | ||
|
||
def __init__(self, file_path): | ||
#super().__init__(file_path) | ||
self.file_path = file_path | ||
#these 5 files below have incorrect lat/lon info | ||
#we assign [90,-90,180,-180] temporarily | ||
#after finishing in PROD, come back to assign summary metadata to them | ||
self.file_excluded = ['OR_GLM-L2-CIERRA-DB_GOES-EAST_s20192931845000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-EAST_s20193132345000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20203590215000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20203591600000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20210122000000.nc'] | ||
self.fileformat = 'netCDF-4' | ||
|
||
# extracting time and space metadata from nc file | ||
dataset = Dataset(file_path) | ||
[self.minTime, self.maxTime, self.SLat, self.NLat, self.WLon, self.ELon] = \ | ||
self.get_variables_min_max(dataset, file_path) | ||
dataset.close() | ||
|
||
def get_variables_min_max(self, datafile, filename): | ||
""" | ||
:param datafile: Dataset opened | ||
:param filename: file path | ||
:return: | ||
""" | ||
ftype = datafile.file_format | ||
if ftype.startswith('NETCDF3'): | ||
file_type = "netCDF-3" | ||
else: | ||
file_type = "netCDF-4" | ||
|
||
lats = np.array(datafile['FLASH_LAT'][:]) | ||
lons = np.array(datafile['FLASH_LON'][:]) | ||
|
||
maxlat, minlat, maxlon, minlon = [np.nanmax(lats), | ||
np.nanmin(lats), | ||
np.nanmax(lons), | ||
np.nanmin(lons)] | ||
if filename.split('/')[-1] in self.file_excluded: | ||
#assign summary metadata to these files | ||
#'north': '57.267', 'south': '-57.312', 'east': '180.0', 'west': '-180.0' | ||
maxlat, minlat, maxlon, minlon = [57.267,-57.312, 180.,-180.] | ||
|
||
minTime = datetime.strptime(datafile.TIME_COVERAGE_START,'%Y-%m-%d %H:%M:%SZ') | ||
maxTime = datetime.strptime(datafile.TIME_COVERAGE_END,'%Y-%m-%d %H:%M:%SZ') | ||
|
||
return minTime, maxTime, minlat, maxlat, minlon, maxlon | ||
|
||
def get_wnes_geometry(self, scale_factor=1.0, offset=0): | ||
""" | ||
Extract the geometry from a GIF file | ||
:param scale_factor: In case it is not CF compliant we will need scale factor | ||
:param offset: data offset if the netCDF not CF compliant | ||
:return: list of bounding box coordinates [west, north, east, south] | ||
""" | ||
north, south, east, west = [round((x * scale_factor) + offset, 3) for x in | ||
[self.NLat, self.SLat, self.ELon, self.WLon]] | ||
return [self.convert_360_to_180(west), north, self.convert_360_to_180(east), south] | ||
|
||
def get_temporal(self, time_variable_key='time', units_variable='units', scale_factor=1.0, | ||
offset=0, | ||
date_format='%Y-%m-%dT%H:%M:%SZ'): | ||
""" | ||
:param time_variable_key: The NetCDF variable we need to target | ||
:param units_variable: The NetCDF variable we need to target | ||
:param scale_factor: In case it is not CF compliant we will need scale factor | ||
:param offset: data offset if the netCDF not CF compliant | ||
:param date_format IF specified the return type will be a string type | ||
:return: | ||
""" | ||
start_date = self.minTime.strftime(date_format) | ||
stop_date = self.maxTime.strftime(date_format) | ||
return start_date, stop_date | ||
|
||
def get_metadata(self, ds_short_name, format='netCDF-4', version='1', **kwargs): | ||
""" | ||
:param ds_short_name: | ||
:param time_variable_key: | ||
:param lon_variable_key: | ||
:param lat_variable_key: | ||
:param time_units: | ||
:param format: | ||
:return: | ||
""" | ||
data = dict() | ||
data['GranuleUR'] = granule_name = os.path.basename(self.file_path) | ||
start_date, stop_date = self.get_temporal() | ||
data['ShortName'] = ds_short_name | ||
data['BeginningDateTime'], data['EndingDateTime'] = start_date, stop_date | ||
|
||
geometry_list = self.get_wnes_geometry() | ||
data['WestBoundingCoordinate'], data['NorthBoundingCoordinate'], \ | ||
data['EastBoundingCoordinate'], data['SouthBoundingCoordinate'] = list( | ||
str(x) for x in geometry_list) | ||
data['checksum'] = self.get_checksum() | ||
data['SizeMBDataGranule'] = str(round(self.get_file_size_megabytes(), 2)) | ||
data['DataFormat'] = self.fileformat | ||
data['VersionId'] = version | ||
return data | ||
|
||
|
||
if __name__ == '__main__': | ||
print('Extracting glmcierra Metadata') | ||
path_to_file = "../../test/fixtures/" | ||
exnet = ExtractSbuceilimpactsMetadata(path_to_file) | ||
metada = exnet.get_metadata("test") | ||
print(metada) |
89 changes: 89 additions & 0 deletions
89
mdx/granule_metadata_extractor/src/helpers/creators/glmcierra.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# create lookup zip for glmcierra | ||
# for all future collections | ||
from datetime import datetime, timedelta | ||
from utils.mdx import MDX | ||
import cProfile | ||
import time | ||
import math | ||
import re | ||
|
||
from netCDF4 import Dataset | ||
import numpy as np | ||
|
||
short_name = "glmcierra" | ||
provider_path = "glmcierra/" | ||
file_type = "netCDF-4" | ||
|
||
file_excluded = ['OR_GLM-L2-CIERRA-DB_GOES-EAST_s20192931845000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-EAST_s20193132345000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20203590215000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20203591600000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20210122000000.nc'] | ||
|
||
class MDXProcessing(MDX): | ||
|
||
def __init__(self): | ||
super().__init__() | ||
|
||
def process(self, filename, file_obj_stream) -> dict: | ||
""" | ||
Individual collection processing logic for spatial and temporal | ||
metadata extraction | ||
:param filename: name of file to process | ||
:type filename: str | ||
:param file_obj_stream: file object stream to be processed | ||
:type file_obj_stream: botocore.response.StreamingBody | ||
""" | ||
return self.get_nc_metadata(filename, file_obj_stream) | ||
|
||
def get_nc_metadata(self, filename, file_obj_stream): | ||
""" | ||
Extract temporal and spatial metadata from netCDF-4 files | ||
""" | ||
print(filename) | ||
datafile = Dataset("in-mem-file", mode='r', memory=file_obj_stream.read()) | ||
ftype = datafile.file_format | ||
if ftype.startswith('NETCDF3'): | ||
file_type = "netCDF-3" | ||
else: | ||
file_type = "netCDF-4" | ||
|
||
lats = np.array(datafile['FLASH_LAT'][:]) | ||
lons = np.array(datafile['FLASH_LON'][:]) | ||
|
||
north, south, east, west = [np.nanmax(lats), | ||
np.nanmin(lats), | ||
np.nanmax(lons), | ||
np.nanmin(lons)] | ||
if filename.split('/')[-1] in file_excluded: | ||
#assign summary metadata | ||
#"north": 57.267, "south": -57.312, "east": 180.0, "west": -180.0 | ||
north, south, east, west = [57.267, -57.312, 180.0, -180.0] | ||
|
||
start_time = datetime.strptime(datafile.TIME_COVERAGE_START,'%Y-%m-%d %H:%M:%SZ') | ||
end_time = datetime.strptime(datafile.TIME_COVERAGE_END,'%Y-%m-%d %H:%M:%SZ') | ||
datafile.close() | ||
return { | ||
"start": start_time, | ||
"end": end_time, | ||
"north": north, | ||
"south": south, | ||
"east": east, | ||
"west": west, | ||
"format": file_type | ||
} | ||
|
||
|
||
def main(self): | ||
# start_time = time.time() | ||
self.process_collection(short_name, provider_path) | ||
# elapsed_time = time.time() - start_time | ||
# print(f"Elapsed time in seconds: {elapsed_time}") | ||
self.shutdown_ec2() | ||
|
||
|
||
if __name__ == '__main__': | ||
MDXProcessing().main() | ||
# The below can be use to run a profiler and see which functions are | ||
# taking the most time to process | ||
# cProfile.run('MDXProcessing().main()', sort='tottime') |
48 changes: 48 additions & 0 deletions
48
mdx/granule_metadata_extractor/src/helpers/creators/glmcierra_update_lookup.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from zipfile import ZipFile | ||
import json | ||
import os | ||
import pathlib | ||
|
||
#summary metadata | ||
#"north": 57.267, "south": -57.312, "east": 180.0, "west": -180.0 | ||
#north, south, east, west = [57.267, -57.312, 180.0, -180.0] | ||
|
||
file_excluded = ['OR_GLM-L2-CIERRA-DB_GOES-EAST_s20192931845000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-EAST_s20193132345000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20203590215000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20203591600000.nc', | ||
'OR_GLM-L2-CIERRA-DB_GOES-WEST_s20210122000000.nc'] | ||
|
||
|
||
|
||
#Get netCDF-4 metadata attributes from lookup zip | ||
lookup_zip_path = os.path.join(pathlib.Path(__file__).parent.absolute(), | ||
f"../glmcierra.zip") | ||
lookup_zip_path_orig = os.path.join(pathlib.Path(__file__).parent.absolute(), | ||
f"../glmcierra_orig.zip") | ||
os.rename(lookup_zip_path, lookup_zip_path_orig) | ||
|
||
with ZipFile(lookup_zip_path_orig) as lookup_zip_orig: | ||
with lookup_zip_orig.open("lookup.json") as collection_lookup_orig: | ||
metadata = json.load(collection_lookup_orig) | ||
with ZipFile(lookup_zip_path_orig) as lookup_zip_orig: | ||
with lookup_zip_orig.open("summary.json") as summary_meta_orig: | ||
summary_meta = json.load(summary_meta_orig) | ||
|
||
#{"OR_GLM-L2-CIERRA-DB_GOES-EAST_s20170122300000.nc": {"start": "2017-01-12T23:00:00Z", "end": "2017-01-12T23:14:59Z", "north": "56.429", "south": "-56.141", "east": "-33.987", "west": "-148.712", "format": "netCDF-4", "sizeMB": 0.92} | ||
|
||
for key in file_excluded: | ||
metadata[key]["north"] = "57.267" | ||
metadata[key]["south"] = "-57.312" | ||
metadata[key]["east"] = "180.0" | ||
metadata[key]["west"] = "-180.0" | ||
|
||
with open('./lookup.json', 'w') as fp: | ||
json.dump(metadata, fp) | ||
with open('./summary.json', 'w') as fp: | ||
json.dump(summary_meta, fp) | ||
|
||
# The below 2 line can also be substituted by the command line "zip glmcierra.zip lookup.json" | ||
with ZipFile('../glmcierra.zip', 'w') as myzip: | ||
myzip.write('lookup.json') | ||
myzip.write('summary.json') |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Oops, something went wrong.