Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/issue 248 - Track ingest operations need to query UAT for granule files #249

Merged
merged 5 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Removed
### Fixed
- Issue 235 - Track ingest table can be populated with granules that aren't loaded into Hydrocron
- Issue 248 - Track ingest operations need to query UAT for granule files if track ingest is running in SIT or UAT
### Security

## [1.4.1]
Expand Down
16 changes: 10 additions & 6 deletions hydrocron/db/track_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,31 +109,27 @@ def query_cmr(self, temporal):
bearer_token = self._get_bearer_token()
granules = query.short_name(self.SHORTNAME[self.collection_shortname]) \
.temporal(self.query_start, self.query_end) \
.format("umm_json") \
.mode(CMR_UAT) \
.bearer_token(bearer_token) \
.get(query.hits())
granules = self._filter_granules(granules)
else:
granules = query.short_name(self.collection_shortname) \
.temporal(self.query_start, self.query_end) \
.format("umm_json") \
.get(query.hits())
else:
logging.info("Querying CMR revision_date range: %s to %s.", self.query_start, self.query_end)
if self.ENV in ("sit", "uat"):
bearer_token = self._get_bearer_token()
granules = query.short_name(self.SHORTNAME[self.collection_shortname]) \
.revision_date(self.query_start, self.query_end) \
.format("umm_json") \
.mode(CMR_UAT) \
.bearer_token(bearer_token) \
.get(query.hits())
granules = self._filter_granules(granules)
else:
granules = query.short_name(self.collection_shortname) \
.revision_date(self.query_start, self.query_end) \
.format("umm_json") \
.get(query.hits())

cmr_granules = {}
Expand Down Expand Up @@ -295,7 +291,15 @@ def _query_granule_files(self, granule_ur):
"""

query = GranuleQuery()
granules = query.short_name(self.collection_shortname).readable_granule_name(granule_ur).format("umm_json").get_all()
query = query.short_name(self.collection_shortname).readable_granule_name(granule_ur).format("umm_json")

if self.ENV in ("sit", "uat"):
bearer_token = self._get_bearer_token()
query = query.bearer_token(bearer_token) \
.mode(CMR_UAT) \
.short_name(self.SHORTNAME[self.collection_shortname])

granules = query.get_all()
cnm_files = []
for granule in granules:
granule_json = json.loads(granule)
Expand All @@ -313,7 +317,7 @@ def _query_granule_files(self, granule_ur):
for granule_url in granule_item["umm"]["RelatedUrls"]:
if granule_url["Type"] == "GET DATA VIA DIRECT ACCESS":
if self.ENV in ("sit", "uat"):
cnm_file["uri"] = granule_url["URL"].replace("ops", "uat")
cnm_file["uri"] = granule_url["URL"].replace("sit", "uat")
else:
cnm_file["uri"] = granule_url["URL"]
cnm_files.append(cnm_file)
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def mock_sns():
os.environ["AWS_DEFAULT_REGION"] = "us-west-2"

sns = boto3.client("sns")
sns.create_topic(Name="svc-hydrocron-sit-cnm-response")
sns.create_topic(Name="svc-hydrocron-test-cnm-response")

yield sns

Expand Down
2 changes: 1 addition & 1 deletion tests/test_data/track_ingest_cnm_message.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"checksumType": "md5",
"checksum": "6ce27e868bd90055252de186f554759f",
"size": 745878,
"uri": "s3://podaac-swot-uat-cumulus-protected/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip"
"uri": "s3://podaac-swot-ops-cumulus-protected/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip"
}
]
}
Expand Down
8 changes: 4 additions & 4 deletions tests/test_track_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ def test_query_cmr(mock_ssm):

Uses vcrpy to record CMR API response.
"""
os.environ['HYDROCRON_ENV'] = 'OPS'
from hydrocron.db.track_ingest import Track

collection_shortname = "SWOT_L2_HR_RiverSP_reach_2.0"
collection_start_date = datetime.datetime.strptime("20240630", "%Y%m%d").replace(tzinfo=datetime.timezone.utc)
track = Track(collection_shortname, collection_start_date)
track.query_start = datetime.datetime(2024, 6, 30, 0, 0, 0, tzinfo=datetime.timezone.utc)
track.query_end = datetime.datetime(2024, 6, 30, 12, 0, 0, tzinfo=datetime.timezone.utc)
track.ENV = "OPS"

vcr_cassette = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) \
.joinpath('vcr_cassettes').joinpath('cmr_query.yaml')
Expand Down Expand Up @@ -302,15 +302,15 @@ def test_track_ingest_publish_cnm(track_ingest_cnm_fixture):
"actual_feature_count": 0,
"status": "to_ingest"
}]
track.ENV = "sit"
track.ENV = "test"

vcr_cassette = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) \
.joinpath('vcr_cassettes').joinpath('publish_cnm.yaml')
with vcr.use_cassette(vcr_cassette, decode_compressed_response=True):
track.publish_cnm_ingest(DEFAULT_ACCOUNT_ID)

sns_backend = sns_backends[DEFAULT_ACCOUNT_ID]["us-west-2"]
actual = json.loads(sns_backend.topics[f"arn:aws:sns:us-west-2:{DEFAULT_ACCOUNT_ID}:svc-hydrocron-sit-cnm-response"].sent_notifications[0][1])
actual = json.loads(sns_backend.topics[f"arn:aws:sns:us-west-2:{DEFAULT_ACCOUNT_ID}:svc-hydrocron-test-cnm-response"].sent_notifications[0][1])

expected_file = (pathlib.Path(os.path.dirname(os.path.realpath(__file__)))
.joinpath('test_data').joinpath('track_ingest_cnm_message.json'))
Expand Down
42 changes: 21 additions & 21 deletions tests/vcr_cassettes/publish_cnm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ interactions:
uri: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=SWOT_L2_HR_RiverSP_reach_2.0&readable_granule_name%5B%5D=SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip&options%5Breadable_granule_name%5D%5Bpattern%5D=true&page_size=0
response:
body:
string: '{"hits":1,"took":105,"items":[]}'
string: '{"hits":1,"took":95,"items":[]}'
headers:
Access-Control-Allow-Origin:
- '*'
Expand All @@ -24,19 +24,19 @@ interactions:
CMR-Hits:
- '1'
CMR-Request-Id:
- e3986cbe-e92f-4f66-9d3a-135846d23a3c
- 9f40c39a-12cc-4033-b67a-cb8fee448bc5
CMR-Took:
- '106'
- '95'
Connection:
- keep-alive
Content-MD5:
- efb24cae14e6d340937153b1c3fcd8e7
- 461384cf862923b25d050408730eacc0
Content-SHA1:
- 5f45a4e901063718301c91a36cdd34550fb0ca50
- c6ee7948f5b33a1d02a79d1cd64cecafb8a78d11
Content-Type:
- application/vnd.nasa.cmr.umm_results+json;version=1.6.6; charset=utf-8
Date:
- Thu, 10 Oct 2024 21:05:46 GMT
- Fri, 11 Oct 2024 20:42:19 GMT
Server:
- ServerTokens ProductOnly
Strict-Transport-Security:
Expand All @@ -46,23 +46,23 @@ interactions:
Vary:
- Accept-Encoding, User-Agent
Via:
- 1.1 27b5e48b551f58a537d4366486948250.cloudfront.net (CloudFront)
- 1.1 c4e50e26fcbefa29d8d1be685a1f4242.cloudfront.net (CloudFront)
X-Amz-Cf-Id:
- -dGCq9LYLoXF9oC5tqf6ZHugYJRfeqrk1uQw1IIpqRc8mTdbvzKVkA==
- OGrkgQzrwmJ_wu5tuaqCMCdg3rdW0DUZHyOoAZG4qphS18EuJmrEcA==
X-Amz-Cf-Pop:
- LAX54-P3
- LAX50-C1
X-Cache:
- Miss from cloudfront
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-Request-Id:
- -dGCq9LYLoXF9oC5tqf6ZHugYJRfeqrk1uQw1IIpqRc8mTdbvzKVkA==
- OGrkgQzrwmJ_wu5tuaqCMCdg3rdW0DUZHyOoAZG4qphS18EuJmrEcA==
X-XSS-Protection:
- 1; mode=block
content-length:
- '32'
- '31'
status:
code: 200
message: OK
Expand All @@ -81,7 +81,7 @@ interactions:
uri: https://cmr.earthdata.nasa.gov/search/granules.umm_json?short_name=SWOT_L2_HR_RiverSP_reach_2.0&readable_granule_name%5B%5D=SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip&options%5Breadable_granule_name%5D%5Bpattern%5D=true&page_size=1
response:
body:
string: '{"hits":1,"took":74,"items":[{"meta":{"concept-type":"granule","concept-id":"G3232820892-POCLOUD","revision-id":1,"native-id":"SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01_swot","collection-concept-id":"C2799438303-POCLOUD","provider-id":"POCLOUD","format":"application/vnd.nasa.cmr.umm+json","revision-date":"2024-09-09T02:57:10.684Z"},"umm":{"TemporalExtent":{"RangeDateTime":{"EndingDateTime":"2024-09-05T23:31:35.589Z","BeginningDateTime":"2024-09-05T23:31:34.815Z"}},"GranuleUR":"SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01_swot","AdditionalAttributes":[{"Values":["246L","247L","248L","249L","250L","251L","252L","253L","254L","255L","256L","257L","258L","246R","247R","248R","249R","250R","251R","252R","253R","254R","255R","256R","257R","258R"],"Name":"TILE"}],"MeasuredParameters":[{"QAStats":{"QAPercentMissingData":0,"QAPercentOutOfBoundsData":0},"ParameterName":"N/A"}],"SpatialExtent":{"HorizontalSpatialDomain":{"Geometry":{"BoundingRectangles":[{"WestBoundingCoordinate":-130.8307934819651,"SouthBoundingCoordinate":52.93360004865338,"EastBoundingCoordinate":-125.58639150138737,"NorthBoundingCoordinate":58.82204893062929}]},"Track":{"Cycle":20,"Passes":[{"Pass":457,"Tiles":["246L","247L","248L","249L","250L","251L","252L","253L","254L","255L","256L","257L","258L","246R","247R","248R","249R","250R","251R","252R","253R","254R","255R","256R","257R","258R"]}]}}},"ProviderDates":[{"Type":"Insert","Date":"2024-09-09T01:25:25.739Z"},{"Type":"Update","Date":"2024-09-09T01:25:25.739Z"}],"CollectionReference":{"Version":"2.0","ShortName":"SWOT_L2_HR_RiverSP_reach_2.0"},"PGEVersionClass":{"PGEName":"PGE_L2_HR_RiverSP","PGEVersion":"5.0.4"},"RelatedUrls":[{"URL":"https://archive.swot.podaac.earthdata.nasa.gov/podaac-swot-ops-cumulus-public/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip.iso.xml","Description":"Download
string: '{"hits":1,"took":92,"items":[{"meta":{"concept-type":"granule","concept-id":"G3232820892-POCLOUD","revision-id":1,"native-id":"SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01_swot","collection-concept-id":"C2799438303-POCLOUD","provider-id":"POCLOUD","format":"application/vnd.nasa.cmr.umm+json","revision-date":"2024-09-09T02:57:10.684Z"},"umm":{"TemporalExtent":{"RangeDateTime":{"EndingDateTime":"2024-09-05T23:31:35.589Z","BeginningDateTime":"2024-09-05T23:31:34.815Z"}},"GranuleUR":"SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01_swot","AdditionalAttributes":[{"Values":["246L","247L","248L","249L","250L","251L","252L","253L","254L","255L","256L","257L","258L","246R","247R","248R","249R","250R","251R","252R","253R","254R","255R","256R","257R","258R"],"Name":"TILE"}],"MeasuredParameters":[{"QAStats":{"QAPercentMissingData":0,"QAPercentOutOfBoundsData":0},"ParameterName":"N/A"}],"SpatialExtent":{"HorizontalSpatialDomain":{"Geometry":{"BoundingRectangles":[{"WestBoundingCoordinate":-130.8307934819651,"SouthBoundingCoordinate":52.93360004865338,"EastBoundingCoordinate":-125.58639150138737,"NorthBoundingCoordinate":58.82204893062929}]},"Track":{"Cycle":20,"Passes":[{"Pass":457,"Tiles":["246L","247L","248L","249L","250L","251L","252L","253L","254L","255L","256L","257L","258L","246R","247R","248R","249R","250R","251R","252R","253R","254R","255R","256R","257R","258R"]}]}}},"ProviderDates":[{"Type":"Insert","Date":"2024-09-09T01:25:25.739Z"},{"Type":"Update","Date":"2024-09-09T01:25:25.739Z"}],"CollectionReference":{"Version":"2.0","ShortName":"SWOT_L2_HR_RiverSP_reach_2.0"},"PGEVersionClass":{"PGEName":"PGE_L2_HR_RiverSP","PGEVersion":"5.0.4"},"RelatedUrls":[{"URL":"https://archive.swot.podaac.earthdata.nasa.gov/podaac-swot-ops-cumulus-public/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip.iso.xml","Description":"Download
SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip.iso.xml","Type":"EXTENDED
METADATA"},{"URL":"s3://podaac-swot-ops-cumulus-public/SWOT_L2_HR_RiverSP_2.0/SWOT_L2_HR_RiverSP_Reach_020_457_NA_20240905T233134_20240905T233135_PIC0_01.zip.iso.xml","Description":"This
link provides direct download access via S3 to the granule","Type":"EXTENDED
Expand Down Expand Up @@ -114,21 +114,21 @@ interactions:
CMR-Hits:
- '1'
CMR-Request-Id:
- 21ec8088-e6d8-4bbe-b659-fd9a86915dc8
- f3bd451b-250e-464f-aca3-261d20a36ae4
CMR-Search-After:
- '["pocloud",1725579094815,3232820892]'
CMR-Took:
- '75'
- '93'
Connection:
- keep-alive
Content-MD5:
- 77046ee32e3289cf25ac89b8a8f2e431
- 544062937074509d57e9507aa95f44a3
Content-SHA1:
- dab6e5694fc519744720a256a21ea1c522f2218f
- 4df473f06f2f0b28ee0e328160064de80869bf4f
Content-Type:
- application/vnd.nasa.cmr.umm_results+json;version=1.6.6; charset=utf-8
Date:
- Thu, 10 Oct 2024 21:05:46 GMT
- Fri, 11 Oct 2024 20:42:20 GMT
Server:
- ServerTokens ProductOnly
Strict-Transport-Security:
Expand All @@ -138,19 +138,19 @@ interactions:
Vary:
- Accept-Encoding, User-Agent
Via:
- 1.1 e4cf17e75928ca088d52aecb26f1f1da.cloudfront.net (CloudFront)
- 1.1 92dd5512d5f290fe351674f3051d6d82.cloudfront.net (CloudFront)
X-Amz-Cf-Id:
- YohmjCui14QWvPVi2hcsgadZt5eKbVh6WQscK946z7WnDuV74O2BRg==
- n2l1zw3GXkohQmQItnRDXs5Rh3HPLrSzLvWEeWDpaCjBvUH5Dm7_fw==
X-Amz-Cf-Pop:
- LAX54-P3
- LAX50-C1
X-Cache:
- Miss from cloudfront
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- SAMEORIGIN
X-Request-Id:
- YohmjCui14QWvPVi2hcsgadZt5eKbVh6WQscK946z7WnDuV74O2BRg==
- n2l1zw3GXkohQmQItnRDXs5Rh3HPLrSzLvWEeWDpaCjBvUH5Dm7_fw==
X-XSS-Protection:
- 1; mode=block
content-length:
Expand Down