Skip to content

Commit

Permalink
Merge branch 'master' into update-pyhdf-version
Browse files Browse the repository at this point in the history
  • Loading branch information
DavisGauntUAH committed Jun 28, 2024
2 parents ed1e8d1 + b91b900 commit 039c73f
Show file tree
Hide file tree
Showing 12 changed files with 990 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ARG stage

COPY requirements*.txt /tmp/

RUN pip install -r /tmp/requirements.txt --target "${LAMBDA_TASK_ROOT}"
RUN pip install --upgrade --force-reinstall -r /tmp/requirements.txt --target "${LAMBDA_TASK_ROOT}"

# Only if stage is other than dev
ADD mdx ${LAMBDA_TASK_ROOT}
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# create lookup zip for cossirimpacts
# for all future collections
from datetime import datetime, timedelta
from utils.mdx import MDX
import cProfile
import time
import math
import re

from netCDF4 import Dataset
import numpy as np

short_name = "cossirimpacts"
provider_path = "cossirimpacts/"
file_type = "netCDF-4"


class MDXProcessing(MDX):

def __init__(self):
super().__init__()

def process(self, filename, file_obj_stream) -> dict:
"""
Individual collection processing logic for spatial and temporal
metadata extraction
:param filename: name of file to process
:type filename: str
:param file_obj_stream: file object stream to be processed
:type file_obj_stream: botocore.response.StreamingBody
"""
return self.get_nc_metadata(filename, file_obj_stream)


def get_nc_metadata(self, filename, file_obj_stream):
"""
Extract temporal and spatial metadata from netCDF-3 files
"""
nc = Dataset("in-mem-file", mode='r', memory=file_obj_stream.read())
lat = np.array(nc['Latitude'])
lon = np.array(nc['Longitude'])
year = np.array(nc['Year'])
mon = np.array(nc['Month'])
day = np.array(nc['DayOfMonth'])
hr = np.array(nc['Hour'])
mn = np.array(nc['Minute'])
sec = np.array(nc['Second'])

#set missing values in lat and lon to np.nan
lat[lat==-999.0] = np.nan
lon[lon==-999.0] = np.nan
north, south, east, west = [np.nanmax(lat), np.nanmin(lat),
np.nanmax(lon), np.nanmin(lon)]

start_time, end_time = [datetime(2100, 1, 1), datetime(1900, 1, 1)]
for i in range(sec.shape[0]):
for j in range(sec.shape[1]):
if int(year[i,j]) != -999.0:
total_sec = int(hr[i,j]*3600 + mn[i,j]*60 + sec[i,j])
dt = datetime(int(year[i,j]), int(mon[i,j]), int(day[i,j])) + \
timedelta(seconds=total_sec)
start_time = min(dt, start_time)
end_time = max(dt, end_time)

nc.close()
return {
"start": start_time,
"end": end_time,
"north": north,
"south": south,
"east": east,
"west": west,
"format": file_type
}


def main(self):
# start_time = time.time()
self.process_collection(short_name, provider_path)
# elapsed_time = time.time() - start_time
# print(f"Elapsed time in seconds: {elapsed_time}")
self.shutdown_ec2()


if __name__ == '__main__':
MDXProcessing().main()
# The below can be use to run a profiler and see which functions are
# taking the most time to process
# cProfile.run('MDXProcessing().main()', sort='tottime')
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Create lookup zip for er2navimpacts
# for all future collections
from datetime import datetime, timedelta
from utils.mdx import MDX
import cProfile
import time
import math
import re

short_name = "er2navimpacts"
provider_path = "er2navimpacts/fieldCampaigns/impacts/ER2Nav/data/"
file_type = "ASCII"


class MDXProcessing(MDX):

def __init__(self):
super().__init__()

def process(self, filename, file_obj_stream) -> dict:
"""
Individual collection processing logic for spatial and temporal
metadata extraction
:param filename: name of file to process
:type filename: str
:param file_obj_stream: file object stream to be processed
:type file_obj_stream: botocore.response.StreamingBody
"""
return self.read_metadata_ascii(filename, file_obj_stream)


def read_metadata_ascii(self,filename, file_obj_stream):
"""
Extract temporal and spatial metadata from ascii files
"""
print(filename)
file_lines = []
for encoded_line in file_obj_stream.iter_lines():
file_lines.append(encoded_line.decode("utf-8"))

num_header_lines = int(file_lines[0].split(',')[0])
databuf = file_lines[num_header_lines:]
minTime, maxTime, minlat, maxlat, minlon, maxlon = [datetime(2100,1,1),
datetime(1900,1,1),
90.0,-90.0,180.0,-180.0]
year = int(filename.split('/')[-1].split('_')[3][0:4])
for i in range(len(databuf)):
tkn = databuf[i].split(',')
sec, doy, lat, lon = [int(tkn[0]), int(tkn[1]), float(tkn[2]), float(tkn[3])]
dt = datetime(year,1,1) + timedelta(seconds=sec) + timedelta(days=doy-1)
minTime = min(minTime, dt)
maxTime = max(maxTime, dt)
if lat != -9999.0 and lon != -9999.0:
maxlat = max(maxlat, lat)
minlat = min(minlat, lat)
maxlon = max(maxlon, lon)
minlon = min(minlon, lon)

return {
"start": minTime,
"end": maxTime,
"north": maxlat,
"south": minlat,
"east": maxlon,
"west": minlon,
"format": file_type
}

def main(self):
# start_time = time.time()
self.process_collection(short_name, provider_path)
# elapsed_time = time.time() - start_time
# print(f"Elapsed time in seconds: {elapsed_time}")
self.shutdown_ec2()


if __name__ == '__main__':
MDXProcessing().main()
# The below can be use to run a profiler and see which functions are
# taking the most time to process
# cProfile.run('MDXProcessing().main()', sort='tottime')
107 changes: 107 additions & 0 deletions mdx/granule_metadata_extractor/src/helpers/creators/ualbsndimpacts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Create lookup zip for ualbsndimpacts
# for all future collections
from datetime import datetime, timedelta
from utils.mdx import MDX
import cProfile
import time
import math
import re

short_name = "ualbsndimpacts"
provider_path = "ualbsndimpacts/"
file_type = "ASCII"


class MDXProcessing(MDX):

def __init__(self):
super().__init__()

def process(self, filename, file_obj_stream) -> dict:
"""
Individual collection processing logic for spatial and temporal
metadata extraction
:param filename: name of file to process
:type filename: str
:param file_obj_stream: file object stream to be processed
:type file_obj_stream: botocore.response.StreamingBody
"""
return self.read_metadata_ascii(filename, file_obj_stream)


def read_metadata_ascii(self,filename, file_obj_stream):
"""
Extract temporal and spatial metadata from ascii files
"""
print(filename)
file_lines = []
for encoded_line in file_obj_stream.iter_lines():
file_lines.append(encoded_line.decode("ISO-8859-1"))

num_header_lines = 3
databuf = file_lines[num_header_lines:]
minTime, maxTime, minlat, maxlat, minlon, maxlon = [datetime(2100,1,1),
datetime(1900,1,1),
90.0,-90.0,180.0,-180.0]

for i in range(len(databuf)):
tkn = databuf[i].split()
#tkn[0]: 1/19/2023
#tkn[1]: 5:40:53
#tkn[2]: PM
#tkn[13]: 073°47'57.4"W
#tkn[14]: 42°41'54.1"N

dt = tkn[0].split('/') #i.e., 1, 19, 2023
tt = tkn[1].split(':') #i.e., 5, 40, 53
utc_time_str = ''.join([dt[2],dt[0].zfill(2),dt[1].zfill(2),'T',tt[0].zfill(2),tt[1].zfill(2),tt[2].zfill(2)])
utc_time = datetime.strptime(utc_time_str,'%Y%m%dT%H%M%S')
if tkn[2] == 'PM':
utc_time = utc_time + timedelta(hours=12)
minTime = min(minTime, utc_time)
maxTime = max(maxTime, utc_time)

lon_str = tkn[13] #i.e.,'072°47\'35.1"W'
deg = float(lon_str.split('°')[0]) #i.e., 72
minu = float(lon_str.split('°')[1].split('\'')[0]) #i.e.,47
sec = float(lon_str.split('°')[1].split('\'')[1].split('"')[0]) #i.e.,35.1
lon = (sec/60.+minu)/60.+deg
if lon_str.endswith('W'):
lon = -1.*lon

lat_str = tkn[14] #i.e., '43°07\'30.8"N'
deg = float(lat_str.split('°')[0]) #i.e., 43
minu = float(lat_str.split('°')[1].split('\'')[0]) #i.e.,7
sec = float(lat_str.split('°')[1].split('\'')[1].split('"')[0]) #i.e.,30.8
lat = (sec/60.+minu)/60.+deg
if lat_str.endswith('S'):
lat = -1.*lat

maxlat = max(maxlat, lat)
minlat = min(minlat, lat)
maxlon = max(maxlon, lon)
minlon = min(minlon, lon)

return {
"start": minTime,
"end": maxTime,
"north": maxlat,
"south": minlat,
"east": maxlon,
"west": minlon,
"format": file_type
}

def main(self):
# start_time = time.time()
self.process_collection(short_name, provider_path)
# elapsed_time = time.time() - start_time
# print(f"Elapsed time in seconds: {elapsed_time}")
self.shutdown_ec2()


if __name__ == '__main__':
MDXProcessing().main()
# The below can be use to run a profiler and see which functions are
# taking the most time to process
# cProfile.run('MDXProcessing().main()', sort='tottime')
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 039c73f

Please sign in to comment.