Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enchancing Metadata Support for SOAR #118

Merged
merged 24 commits into from
Jul 6, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
122e964
detector added
NucleonGodX May 27, 2024
0e3307b
test fixes/minor fixes
NucleonGodX May 28, 2024
70c4c32
added tests for detector and joins / combined join operations in a si…
NucleonGodX May 29, 2024
a7f7102
detector tests added, useful comments added to contruct methods
NucleonGodX May 30, 2024
3d15042
suggestions applied
NucleonGodX May 31, 2024
4748035
cleaned the code, all suggestions applied
NucleonGodX May 31, 2024
f5ac3ec
comment added for join condition
NucleonGodX Jun 1, 2024
8496fed
pre-commit fix
NucleonGodX Jun 1, 2024
f4340d8
code cleanup
NucleonGodX Jun 2, 2024
074bb5c
code cleanup
NucleonGodX Jun 2, 2024
f9a789c
wavelength added with testcases
NucleonGodX Jun 10, 2024
cec907d
updated changelog
NucleonGodX Jun 13, 2024
1873850
test case updated
NucleonGodX Jun 14, 2024
76e1900
Apply suggestions from code review
NucleonGodX Jun 20, 2024
d920659
suggestions applied
NucleonGodX Jun 20, 2024
d1b4510
Merge branch 'main' into gsoc24-soar
NucleonGodX Jun 20, 2024
6fc6921
test cases simplified
NucleonGodX Jun 22, 2024
f9352f1
Merge branch 'main' into gsoc24-soar
NucleonGodX Jun 27, 2024
52366cc
Update sunpy_soar/client.py
NucleonGodX Jun 27, 2024
6b838c3
tests updated
NucleonGodX Jun 27, 2024
183c2d8
phi and it's tests removed
NucleonGodX Jul 4, 2024
645aca5
removed low_latency detector test
NucleonGodX Jul 4, 2024
bd8bc31
code cleanup, h2. prefix removed from attrs file
NucleonGodX Jul 6, 2024
616f96b
Merge branch 'main' into gsoc24-soar
NucleonGodX Jul 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sunpy_soar/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,8 @@ def _(wlk, attr, params): # NOQA: ARG001
@walker.add_applier(SOOP)
def _(wlk, attr, params): # NOQA: ARG001
params.append(f"soop_name='{attr.value}'")


@walker.add_applier(a.Detector)
def _(wlk, attr, params): # NOQA: ARG001
params.append(f"Detector='{attr.value}'")
140 changes: 121 additions & 19 deletions sunpy_soar/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class SOARClient(BaseClient):
Client to access the Solar Orbiter Archive (SOAR).
"""

join_needed = False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I like the idea of this being part of the client, can this not be moved else where?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could achieve a solution by making a basic function for it, let me know if its better!


def search(self, *query, **kwargs): # NOQA: ARG002
query = and_(*query)
queries = walker.create(query)
Expand All @@ -35,35 +37,132 @@ def search(self, *query, **kwargs): # NOQA: ARG002
qrt.hide_keys = ["Data item ID", "Filename"]
return qrt

@staticmethod
def construct_join(query, join_needed, data_table, instrument_table):
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
"""
Construct the WHERE, FROM, and SELECT parts of the ADQL query.
nabobalis marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
query : list[str]
List of query items.
join_needed : bool
Flag indicating whether a join is needed.
data_table : str
Name of the data table.
instrument_table : str
Name of the instrument table.

Returns
-------
tuple[str, str, str]
WHERE, FROM, and SELECT parts of the query.
"""
final_query = ""
for parameter in query:
prefix = "h1." if not join_needed or not parameter.startswith("Detector") else "h2."
if parameter.startswith("begin_time"):
time_list = parameter.split("+AND+")
begin_start = f"h1.{time_list[0]}"
begin_end = f"h1.{time_list[1]}"
final_query += f"{begin_start}+AND+{begin_end}+AND+"
hayesla marked this conversation as resolved.
Show resolved Hide resolved
# As there are no dimensions in STIX, the dimension index need not be included in the query for STIX.
if "stx" not in instrument_table:
final_query += "h2.dimension_index='1'+AND+"
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
else:
final_query += f"{prefix}{parameter}+AND+"

where_part = final_query[:-5]
from_part = f"{data_table} AS h1"
select_part = (
"h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name"
)
if instrument_table:
from_part += f" JOIN {instrument_table} AS h2 USING (data_item_oid)"
select_part += ", h2.detector, h2.dimension_index"
return where_part, from_part, select_part

@staticmethod
def get_instrument_table(query_item):
"""
Get the instrument table name from the query item.

For different instruments the table names are:

- SOLOHI: v_shi_sc_fits
- EUI: v_eui_sc_fits
- STIX: v_stx_sc_fits
- SPICE: v_spi_sc_fits
- PHI: v_phi_sc_fits
- METIS: v_met_sc_fits

Parameters
----------
query_item : str
Query item containing the instrument name.

Returns
-------
str
Instrument table name.
"""
instrument_name = query_item.split("=")[1][1:-1].split("-")[0].upper()
if instrument_name == "STIX":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a dict that maps the full name to the "table" name would be "cleaner" python code.

instrument_name = "STX"
elif instrument_name == "SOLOHI":
instrument_name = "SHI"
else:
# For all other remote-sensing instruments, the table alias is derived from
# the first three letters of the instrument name.
instrument_name = instrument_name[:3]
return f"v_{instrument_name.lower()}_sc_fits", instrument_name

@staticmethod
def _construct_payload(query):
"""
Construct search payload.

Parameters
----------
payload : dict[str]
Payload to send to the TAP server.
query : list[str]
List of query items.

Returns
-------
dict
Payload dictionary to be sent with the query.
"""
# Construct ADQL query
url_query = {}
url_query["SELECT"] = "*"
# Assume science data by default
url_query["FROM"] = "v_sc_data_item"
# Default data table
data_table = "v_sc_data_item"
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
instrument_table = None
instrument_name = ""
instrument_found = False
for q in query:
if q.startswith("level") and q.split("=")[1][1:3] == "LL":
# Low latency data
url_query["FROM"] = "v_ll_data_item"
if q.startswith("instrument"):
instrument_table, instrument_name = SOARClient.get_instrument_table(q)
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
instrument_found = True
elif q.startswith("descriptor") and not instrument_found:
instrument_table, instrument_name = SOARClient.get_instrument_table(q)
elif q.startswith("level") and q.split("=")[1][1:3] == "LL":
data_table = "v_ll_data_item"
if instrument_table:
instrument_table = instrument_table.replace("_sc_", "_ll_")
# The table aliases employed in the instrument-specific tables are as follows.
if instrument_name in ["EUI", "STX", "MET", "SPI", "PHI", "SHI"]:
SOARClient.join_needed = True
where_part, from_part, select_part = SOARClient.construct_join(
query, SOARClient.join_needed, data_table, instrument_table
)
else:
from_part = data_table
select_part = "*"
where_part = "+AND+".join(query)

url_query["WHERE"] = "+AND+".join(query)
adql_query = "+".join([f"{item}+{url_query[item]}" for item in url_query])
adql_query = {"SELECT": select_part, "FROM": from_part, "WHERE": where_part}

return {
"REQUEST": "doQuery",
"LANG": "ADQL",
"FORMAT": "json",
"QUERY": adql_query,
}
adql_query_str = "+".join([f"{key}+{value}" for key, value in adql_query.items()])
hayesla marked this conversation as resolved.
Show resolved Hide resolved
return {"REQUEST": "doQuery", "LANG": "ADQL", "FORMAT": "json", "QUERY": adql_query_str}

@staticmethod
def _do_search(query):
Expand Down Expand Up @@ -92,6 +191,7 @@ def _do_search(query):
# Do some list/dict wrangling
names = [m["name"] for m in r.json()["metadata"]]
info = {name: [] for name in names}

for entry in r.json()["data"]:
for i, name in enumerate(names):
info[name].append(entry[i])
Expand All @@ -113,6 +213,8 @@ def _do_search(query):
"SOOP Name": info["soop_name"],
},
)
if SOARClient.join_needed and "detector" in info:
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
result_table["Detector"] = info["detector"]
result_table.sort("Start time")
return result_table

Expand Down Expand Up @@ -160,7 +262,7 @@ def _can_handle_query(cls, *query):
True if this client can handle the given query.
"""
required = {a.Time}
optional = {a.Instrument, a.Level, a.Provider, Product, SOOP}
optional = {a.Instrument, a.Detector, a.Level, a.Provider, Product, SOOP}
if not cls.check_attr_types_in_query(query, required, optional):
return False
# check to make sure the instrument attr passed is one provided by the SOAR.
Expand Down
70 changes: 70 additions & 0 deletions sunpy_soar/tests/test_sunpy_soar.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,73 @@ def test_when_wrong_provider_passed():
provider = a.Provider.noaa
res = Fido.search(time & instrument & provider)
assert len(res) == 0


def test_search_detector_Instrument_dimension_0():
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
NucleonGodX marked this conversation as resolved.
Show resolved Hide resolved
# As STIX data has zero dimensions, there is no detector information available in the SOAR data.
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
instrument = a.Instrument("STIX")
time = a.Time("2023-03-03 00:00", "2023-03-03 23:59")
hayesla marked this conversation as resolved.
Show resolved Hide resolved
level = a.Level(1)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also a test for LL data?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually there is test_search_low_latency() (for MAG data; executes the query), and the new test_join_low_latency_query() (for EUI; only tests the query string, without executing it). So maybe this is enough?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, will reverse this change!

res = Fido.search(instrument & time & level)
assert "Detector" in res[0].columns
assert res.file_num == 35
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Testing the number of results is maybe not very robust, this number might change in SOAR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just followed how tests are already written in test cases. But I do understand your point!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can check for at least 35 files?



def test_search_detector_Instrument_dimension_2():
NucleonGodX marked this conversation as resolved.
Show resolved Hide resolved
instrument = a.Instrument("EUI")
time = a.Time("2020-03-03 15:00", "2020-03-03 16:00")
level = a.Level(1)
detector = a.Detector("HRI_EUV")
res = Fido.search(instrument & time & level & detector)
assert "Detector" in res[0].columns
assert res.file_num == 20


def test_search_detector_Instrument_dimension_4():
NucleonGodX marked this conversation as resolved.
Show resolved Hide resolved
instrument = a.Instrument("SPICE")
time = a.Time("2023-03-03 15:00", "2023-03-03 16:00")
level = a.Level(1)
detector = a.Detector("SW")
res = Fido.search(instrument & time & level & detector)
assert "Detector" in res[0].columns
assert res.file_num == 11


def test_join_science_query():
result = SOARClient._construct_payload( # NOQA: SLF001
[
"instrument='EUI'",
"begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'",
"level='L1'",
"descriptor='eui-fsi174-image'",
]
)

# Formatted assert statement
assert result["QUERY"] == (
"SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, "
"h2.dimension_index+FROM+v_sc_data_item AS h1 JOIN v_eui_sc_fits AS h2 USING (data_item_oid)"
"+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'"
"+AND+h2.dimension_index='1'+AND+h1.level='L1'+AND+h1.descriptor='eui-fsi174-image'"
)


def test_join_low_latency_query():
result = SOARClient._construct_payload( # NOQA: SLF001
[
"instrument='EUI'",
"begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'",
"level='LL01'",
"descriptor='eui-fsi174-image'",
]
)

# Formatted assert statement
assert result["QUERY"] == (
"SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, "
"h2.dimension_index+FROM+v_ll_data_item AS h1 JOIN v_eui_ll_fits AS h2 USING (data_item_oid)"
"+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'"
"+AND+h2.dimension_index='1'+AND+h1.level='LL01'+AND+h1.descriptor='eui-fsi174-image'"
)