From 180727a89ef08855f3ff4b68846e42dabe2fcdb0 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Fri, 18 Oct 2024 15:39:02 -0600 Subject: [PATCH] fix(alerts): Introduced SendAlertsResponse and PercolatorResponses --- cl/alerts/tasks.py | 16 +-- cl/alerts/tests/tests.py | 8 +- cl/alerts/tests/tests_recap_alerts.py | 87 ++++++++------ cl/alerts/utils.py | 132 ++++++++++----------- cl/search/tests/tests_es_oral_arguments.py | 28 ++--- cl/search/types.py | 11 +- 6 files changed, 155 insertions(+), 127 deletions(-) diff --git a/cl/alerts/tasks.py b/cl/alerts/tasks.py index 795efc1cb2..ff0f31e52e 100644 --- a/cl/alerts/tasks.py +++ b/cl/alerts/tasks.py @@ -51,11 +51,11 @@ from cl.search.models import Docket, DocketEntry from cl.search.types import ( ESDocumentNameType, - PercolatorResponsesType, PercolatorResponseType, SaveDocumentResponseType, SaveESDocumentReturnType, SearchAlertHitType, + SendAlertsResponse, ) from cl.stats.utils import tally_stat from cl.users.models import UserProfile @@ -643,7 +643,7 @@ def process_percolator_response(response: PercolatorResponseType) -> None: @app.task(ignore_result=True) -def percolator_response_processing(response: PercolatorResponsesType) -> None: +def percolator_response_processing(response: SendAlertsResponse) -> None: """Process the response from the percolator and handle alerts triggered by the percolator query. @@ -876,7 +876,7 @@ def send_or_schedule_alerts( ) def send_or_schedule_search_alerts( self: Task, response: SaveESDocumentReturnType | None -) -> PercolatorResponsesType | None: +) -> SendAlertsResponse | None: """Send real-time alerts based on the Elasticsearch search response. Or schedule other rates alerts to send them later. @@ -891,8 +891,10 @@ def send_or_schedule_search_alerts( :param self: The celery task :param response: A two tuple, the document ID to be percolated in ES index and the document data that triggered the alert. - :return: A two tuple, a list of Alerts triggered and the document data that - triggered the alert. + :return: A SendAlertsResponse dataclass containing the main alerts + triggered, the recap documents alerts triggered, the docket alerts + triggered, the document content that triggered the alert, and the related + app label model. """ if not response or not settings.PERCOLATOR_SEARCH_ALERTS_ENABLED: @@ -916,7 +918,7 @@ def send_or_schedule_search_alerts( documents_to_percolate, app_label, ) - if not percolator_responses[0]: + if not percolator_responses.main_response: self.request.chain = None return None @@ -936,7 +938,7 @@ def send_or_schedule_search_alerts( ) ) - return PercolatorResponsesType( + return SendAlertsResponse( main_alerts_triggered=main_alerts_triggered, rd_alerts_triggered=rd_alerts_triggered, d_alerts_triggered=d_alerts_triggered, diff --git a/cl/alerts/tests/tests.py b/cl/alerts/tests/tests.py index 633b1d030b..dcf3905155 100644 --- a/cl/alerts/tests/tests.py +++ b/cl/alerts/tests/tests.py @@ -2734,7 +2734,9 @@ def test_percolate_document_in_batches(self, mock_abort_audio): AudioPercolator._index._name, document_index, ) - ids_in_results = [result.id for result in percolator_responses[0].hits] + ids_in_results = [ + result.id for result in percolator_responses.main_response.hits + ] # Update the first in the previous batch. alert_to_modify = alerts_created[0] @@ -2742,7 +2744,7 @@ def test_percolate_document_in_batches(self, mock_abort_audio): alert_to_modify.save() # Percolate the next page. - search_after = percolator_responses[0].hits[-1].meta.sort + search_after = percolator_responses.main_response.hits[-1].meta.sort percolator_responses = percolate_es_document( str(rt_oral_argument.pk), AudioPercolator._index._name, @@ -2752,7 +2754,7 @@ def test_percolate_document_in_batches(self, mock_abort_audio): # The document updated shouldn't be retrieved again. # Since documents are ordered by asc date_created instead of timestamp. - for result in percolator_responses[0].hits: + for result in percolator_responses.main_response.hits: self.assertNotIn(result.id, ids_in_results) ids_in_results.append(result.id) diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py index a5e546361e..8e2d4b10e1 100644 --- a/cl/alerts/tests/tests_recap_alerts.py +++ b/cl/alerts/tests/tests_recap_alerts.py @@ -1786,9 +1786,9 @@ def test_recap_document_cross_object_percolator_queries( app_label, str(self.rd_att.pk) ) expected_queries = 1 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) # Test Percolate a RECAPDocument. It should match the query containing @@ -1805,9 +1805,10 @@ def test_recap_document_cross_object_percolator_queries( app_label, str(self.rd.pk) ) expected_queries = 1 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_1), True + self.confirm_query_matched(responses.main_response, query_id_1), + True, ) # Test Percolate a RECAPDocument. It should match the query containing @@ -1823,12 +1824,14 @@ def test_recap_document_cross_object_percolator_queries( app_label, str(self.rd.pk) ) expected_queries = 2 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_1), True + self.confirm_query_matched(responses.main_response, query_id_1), + True, ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_2), True + self.confirm_query_matched(responses.main_response, query_id_2), + True, ) # Test Percolate a RECAPDocument. It should match the query containing @@ -1845,18 +1848,21 @@ def test_recap_document_cross_object_percolator_queries( ) expected_queries = 3 self.assertEqual( - len(responses[0]), + len(responses.main_response), expected_queries, msg="Wrong number of queries matched.", ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_1), True + self.confirm_query_matched(responses.main_response, query_id_1), + True, ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_2), True + self.confirm_query_matched(responses.main_response, query_id_2), + True, ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_3), True + self.confirm_query_matched(responses.main_response, query_id_3), + True, ) def test_recap_document_percolator(self, mock_prefix) -> None: @@ -1881,12 +1887,12 @@ def test_recap_document_percolator(self, mock_prefix) -> None: ) expected_queries = 1 self.assertEqual( - len(responses[0]), + len(responses.main_response), expected_queries, msg="Matched queries didn't match.", ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) # Test percolate only filters combination. @@ -1904,12 +1910,12 @@ def test_recap_document_percolator(self, mock_prefix) -> None: ) expected_queries = 1 self.assertEqual( - len(responses[0]), + len(responses.main_response), expected_queries, msg="Matched queries didn't match.", ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) # Test percolate a different document targeting a different filters @@ -1927,12 +1933,12 @@ def test_recap_document_percolator(self, mock_prefix) -> None: ) expected_queries = 1 self.assertEqual( - len(responses[0]), + len(responses.main_response), expected_queries, msg="Matched queries didn't match.", ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) # Test percolate the same document loosen the query. @@ -1948,15 +1954,16 @@ def test_recap_document_percolator(self, mock_prefix) -> None: ) expected_queries = 2 self.assertEqual( - len(responses[0]), + len(responses.main_response), expected_queries, msg="Matched queries didn't match.", ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_2), True + self.confirm_query_matched(responses.main_response, query_id_2), + True, ) def test_docket_percolator(self, mock_prefix) -> None: @@ -1981,7 +1988,7 @@ def test_docket_percolator(self, mock_prefix) -> None: document_index_alias, ) expected_queries = 0 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) # Test Percolate a docket object. It shouldn't match the query # containing text query terms contained only in a RD. @@ -1998,7 +2005,7 @@ def test_docket_percolator(self, mock_prefix) -> None: document_index_alias, ) expected_queries = 0 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) # Test Percolate a docket object. Combining docket terms OR RECAPDocument # fields. This query can be triggered only by the Docket document. @@ -2015,9 +2022,12 @@ def test_docket_percolator(self, mock_prefix) -> None: document_index_alias, ) expected_queries = 1 - self.assertEqual(len(responses[0]), expected_queries, msg="error 1") self.assertEqual( - self.confirm_query_matched(responses[0], query_id_2), True + len(responses.main_response), expected_queries, msg="error 1" + ) + self.assertEqual( + self.confirm_query_matched(responses.main_response, query_id_2), + True, ) # Test percolate text query + different filters. @@ -2037,12 +2047,14 @@ def test_docket_percolator(self, mock_prefix) -> None: document_index_alias, ) expected_queries = 2 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_2), True + self.confirm_query_matched(responses.main_response, query_id_2), + True, ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_3), True + self.confirm_query_matched(responses.main_response, query_id_3), + True, ) # Test percolate text query + case_name filter. @@ -2060,9 +2072,10 @@ def test_docket_percolator(self, mock_prefix) -> None: document_index_alias, ) expected_queries = 1 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_4), True + self.confirm_query_matched(responses.main_response, query_id_4), + True, ) # Test percolate one filter. @@ -2079,9 +2092,10 @@ def test_docket_percolator(self, mock_prefix) -> None: document_index_alias, ) expected_queries = 1 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_5), True + self.confirm_query_matched(responses.main_response, query_id_5), + True, ) # Test percolate text query. @@ -2098,15 +2112,18 @@ def test_docket_percolator(self, mock_prefix) -> None: document_index_alias, ) expected_queries = 3 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_2), True + self.confirm_query_matched(responses.main_response, query_id_2), + True, ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_3), True + self.confirm_query_matched(responses.main_response, query_id_3), + True, ) self.assertEqual( - self.confirm_query_matched(responses[0], query_id_6), True + self.confirm_query_matched(responses.main_response, query_id_6), + True, ) def test_index_and_delete_recap_alerts_from_percolator( diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py index b0fa83d2ee..2b24e65134 100644 --- a/cl/alerts/utils.py +++ b/cl/alerts/utils.py @@ -45,7 +45,11 @@ RECAPPercolator, ) from cl.search.models import SEARCH_TYPES, Docket -from cl.search.types import ESDictDocument, ESModelClassType +from cl.search.types import ( + ESDictDocument, + ESModelClassType, + PercolatorResponses, +) @dataclass @@ -155,7 +159,7 @@ def percolate_es_document( main_search_after: int | None = None, rd_search_after: int | None = None, d_search_after: int | None = None, -) -> tuple[Response, Response | None, Response | None]: +) -> PercolatorResponses: """Percolate a document against a defined Elasticsearch Percolator query. :param document_id: The document ID in ES index to be percolated. @@ -172,9 +176,9 @@ def percolate_es_document( search_after param for deep pagination. :param d_search_after: Optional the ES Docket document percolator query search_after param for deep pagination. - :return: A three-tuple containing the main percolator response, the - RECAPDocument percolator response (if applicable), and the Docket - percolator response (if applicable). + :return: A PercolatorResponses dataclass containing the main percolator + response, the RECAPDocument percolator response (if applicable), and the + Docket percolator response (if applicable). """ if document_index: @@ -271,17 +275,22 @@ def percolate_es_document( rd_response = responses[1] if s_d: d_response = responses[2] - return main_response, rd_response, d_response + return PercolatorResponses( + main_response=main_response, + rd_response=rd_response, + d_response=d_response, + ) def fetch_all_search_alerts_results( - initial_responses: tuple[Response, Response | None, Response | None], *args + initial_responses: PercolatorResponses, *args ) -> tuple[list[Hit], list[Hit], list[Hit]]: """Fetches all search alerts results based on a given percolator query and the initial responses. It retrieves all the search results that exceed the initial batch size by iteratively calling percolate_es_document method with the necessary pagination parameters. - :param initial_responses: The initial ES Responses tuple. + :param initial_responses: A PercolatorResponses dataclass containing the + initial ES Percolator Responses. :param args: Additional arguments to pass to the percolate_es_document method. :return: A three-tuple containing the main percolator results, the RECAPDocument percolator results (if applicable), and the Docket @@ -292,62 +301,60 @@ def fetch_all_search_alerts_results( all_rd_alert_hits = [] all_d_alert_hits = [] - main_response = initial_responses[0] + main_response = initial_responses.main_response all_main_alert_hits.extend(main_response.hits) main_total_hits = main_response.hits.total.value main_alerts_returned = len(main_response.hits.hits) rd_response = d_response = None - if initial_responses[1]: - rd_response = initial_responses[1] + if initial_responses.rd_response: + rd_response = initial_responses.rd_response all_rd_alert_hits.extend(rd_response.hits) - if initial_responses[2]: - d_response = initial_responses[2] + if initial_responses.d_response: + d_response = initial_responses.d_response all_d_alert_hits.extend(d_response.hits) - if main_total_hits > settings.ELASTICSEARCH_PAGINATION_BATCH_SIZE: - alerts_retrieved = main_alerts_returned - main_search_after = main_response.hits[-1].meta.sort - rd_search_after = ( - rd_response.hits[-1].meta.sort if rd_response else None - ) - d_search_after = d_response.hits[-1].meta.sort if d_response else None - while True: - search_after_params = { - "main_search_after": main_search_after, - "rd_search_after": rd_search_after, - "d_search_after": d_search_after, - } - responses = percolate_es_document(*args, **search_after_params) - if not responses[0]: - break - - all_main_alert_hits.extend(responses[0].hits) - main_alerts_returned = len(responses[0].hits.hits) - alerts_retrieved += main_alerts_returned - - if responses[1]: - all_rd_alert_hits.extend(responses[1].hits) - if responses[2]: - all_d_alert_hits.extend(responses[2].hits) - # Check if all results have been retrieved. If so break the loop - # Otherwise, increase search_after. - if ( - alerts_retrieved >= main_total_hits - or main_alerts_returned == 0 - ): - break - else: - main_search_after = responses[0].hits[-1].meta.sort - rd_search_after = ( - responses[1].hits[-1].meta.sort - if responses[1] and len(responses[1].hits.hits) - else None - ) - d_search_after = ( - responses[2].hits[-1].meta.sort - if responses[2] and len(responses[2].hits.hits) - else None - ) + if main_total_hits <= settings.ELASTICSEARCH_PAGINATION_BATCH_SIZE: + return all_main_alert_hits, all_rd_alert_hits, all_d_alert_hits + + alerts_retrieved = main_alerts_returned + main_search_after = main_response.hits[-1].meta.sort + rd_search_after = rd_response.hits[-1].meta.sort if rd_response else None + d_search_after = d_response.hits[-1].meta.sort if d_response else None + while True: + search_after_params = { + "main_search_after": main_search_after, + "rd_search_after": rd_search_after, + "d_search_after": d_search_after, + } + responses = percolate_es_document(*args, **search_after_params) + if not responses.main_response: + break + + all_main_alert_hits.extend(responses.main_response.hits) + main_alerts_returned = len(responses.main_response.hits.hits) + alerts_retrieved += main_alerts_returned + + if responses.rd_response: + all_rd_alert_hits.extend(responses.rd_response.hits) + if responses.d_response: + all_d_alert_hits.extend(responses.d_response.hits) + # Check if all results have been retrieved. If so break the loop + # Otherwise, increase search_after. + if alerts_retrieved >= main_total_hits or main_alerts_returned == 0: + break + else: + main_search_after = responses.main_response.hits[-1].meta.sort + rd_search_after = ( + responses.rd_response.hits[-1].meta.sort + if responses.rd_response and len(responses[1].hits.hits) + else None + ) + d_search_after = ( + responses.d_response.hits[-1].meta.sort + if responses.d_response and len(responses[2].hits.hits) + else None + ) + return all_main_alert_hits, all_rd_alert_hits, all_d_alert_hits @@ -605,15 +612,9 @@ def include_recap_document_hit( alert_in_rd_hits = alert_id in recap_document_hits alert_in_docket_hits = alert_id in docket_hits - if not alert_in_docket_hits and not alert_in_rd_hits: - return True - elif not alert_in_docket_hits and alert_in_rd_hits: - return True - elif alert_in_docket_hits and not alert_in_rd_hits: + if alert_in_docket_hits and not alert_in_rd_hits: return False - elif alert_in_docket_hits and alert_in_rd_hits: - return True - return False + return True def get_field_names(mapping_dict): @@ -700,7 +701,6 @@ def prepare_percolator_content(app_label: str, document_id: str) -> tuple[ "docket_slug", "docket_absolute_url", "court_exact", - "docket_child", "timestamp", } parent_document_content = select_es_document_fields( diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py index 6301e31539..a0764b4bda 100644 --- a/cl/search/tests/tests_es_oral_arguments.py +++ b/cl/search/tests/tests_es_oral_arguments.py @@ -2254,9 +2254,9 @@ def test_percolator(self) -> None: oral_argument_index_alias, ) expected_queries = 1 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) cd = { @@ -2274,9 +2274,9 @@ def test_percolator(self) -> None: oral_argument_index_alias, ) expected_queries = 2 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) cd = { @@ -2293,9 +2293,9 @@ def test_percolator(self) -> None: oral_argument_index_alias, ) expected_queries = 1 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) cd = { @@ -2314,9 +2314,9 @@ def test_percolator(self) -> None: oral_argument_index_alias, ) expected_queries = 1 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) cd = { @@ -2336,9 +2336,9 @@ def test_percolator(self) -> None: oral_argument_index_alias, ) expected_queries = 2 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) cd = { @@ -2357,9 +2357,9 @@ def test_percolator(self) -> None: oral_argument_index_alias, ) expected_queries = 3 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) cd = { @@ -2378,9 +2378,9 @@ def test_percolator(self) -> None: oral_argument_index_alias, ) expected_queries = 2 - self.assertEqual(len(responses[0]), expected_queries) + self.assertEqual(len(responses.main_response), expected_queries) self.assertEqual( - self.confirm_query_matched(responses[0], query_id), True + self.confirm_query_matched(responses.main_response, query_id), True ) self.delete_documents_from_index( diff --git a/cl/search/types.py b/cl/search/types.py index 3f4d464f96..17c5820a8a 100644 --- a/cl/search/types.py +++ b/cl/search/types.py @@ -3,7 +3,7 @@ from enum import StrEnum from typing import Any, Literal, Type, Union -from elasticsearch_dsl.response import Hit +from elasticsearch_dsl.response import Hit, Response from elasticsearch_dsl.utils import AttrList from cl.alerts.models import Alert @@ -104,7 +104,7 @@ @dataclass -class PercolatorResponsesType: +class SendAlertsResponse: main_alerts_triggered: list[Hit] rd_alerts_triggered: list[Hit] d_alerts_triggered: list[Hit] @@ -112,6 +112,13 @@ class PercolatorResponsesType: app_label_model: str +@dataclass +class PercolatorResponses: + main_response: Response + rd_response: Response | None + d_response: Response | None + + # TODO: Remove after scheduled OA alerts have been processed. SaveDocumentResponseType = tuple[str, ESDictDocument]