diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py new file mode 100644 index 0000000000..a38ff91f8c --- /dev/null +++ b/cl/alerts/management/commands/cl_send_recap_alerts.py @@ -0,0 +1,730 @@ +import copy +import datetime +import time +import traceback +from typing import Any, Literal + +import pytz +from asgiref.sync import async_to_sync +from django.contrib.auth.models import User +from django.http import QueryDict +from django.utils import timezone +from elasticsearch import Elasticsearch +from elasticsearch.exceptions import RequestError, TransportError +from elasticsearch_dsl import connections +from elasticsearch_dsl.response import Hit, Response +from elasticsearch_dsl.utils import AttrList +from redis import Redis + +from cl.alerts.models import Alert, ScheduledAlertHit +from cl.alerts.tasks import send_search_alert_emails +from cl.alerts.utils import ( + TaskCompletionStatus, + add_document_hit_to_alert_set, + alert_hits_limit_reached, + has_document_alert_hit_been_triggered, +) +from cl.api.models import WebhookEventType +from cl.api.tasks import send_search_alert_webhook_es +from cl.lib.command_utils import VerboseCommand, logger +from cl.lib.date_time import dt_as_local_date +from cl.lib.elasticsearch_utils import do_es_sweep_alert_query +from cl.lib.redis_utils import get_redis_interface +from cl.search.documents import ( + DocketDocument, + ESRECAPSweepDocument, + RECAPSweepDocument, +) +from cl.search.exception import ( + BadProximityQuery, + UnbalancedParenthesesQuery, + UnbalancedQuotesQuery, +) +from cl.search.models import SEARCH_TYPES, Docket +from cl.stats.utils import tally_stat +from cl.users.models import UserProfile + + +def get_task_status(task_id: str, es: Elasticsearch) -> dict[str, Any]: + """Fetch the status of a task from Elasticsearch. + + :param task_id: The ID of the task to fetch the status for. + :param es: The Elasticsearch client instance. + :return: The status of the task if successful, or an empty dictionary if + an error occurs. + """ + try: + return es.tasks.get(task_id=task_id) + except ( + TransportError, + ConnectionError, + RequestError, + ) as e: + logger.error("Error getting sweep alert index task status: %s", e) + return {} + + +def compute_estimated_remaining_time( + initial_wait: float, task_status: TaskCompletionStatus +) -> float: + """Compute the estimated remaining time for the re_index task to complete. + + :param initial_wait: The default wait time in seconds. + :param task_status: An instance of `TaskCompletionStatus` containing task + information. + :return: The estimated remaining time in seconds. If the start time, + created, or total are invalid, the initial default time is returned. + """ + + if ( + task_status.start_time_millis is None + or not task_status.created + or not task_status.total + ): + return initial_wait + + start_time = datetime.datetime.fromtimestamp( + task_status.start_time_millis / 1000.0 + ) + time_now = datetime.datetime.now() + estimated_time_remaining = max( + datetime.timedelta( + seconds=( + (time_now - start_time).total_seconds() / task_status.created + ) + * (task_status.total - task_status.created) + ).total_seconds(), + initial_wait, + ) + + return estimated_time_remaining + + +def retrieve_task_info(task_info: dict[str, Any]) -> TaskCompletionStatus: + """Retrieve task information from the given task dict. + + :param task_info: A dictionary containing the task status information. + :return: A `TaskCompletionStatus` object representing the extracted task + information. + """ + + if task_info: + status = task_info["task"]["status"] + return TaskCompletionStatus( + completed=task_info["completed"], + created=status["created"], + total=status["total"], + start_time_millis=task_info["task"]["start_time_in_millis"], + ) + return TaskCompletionStatus() + + +def index_daily_recap_documents( + r: Redis, + source_index_name: str, + target_index: type[RECAPSweepDocument] | type[ESRECAPSweepDocument], + testing: bool = False, + only_rd: bool = False, +) -> int: + """Index Dockets added/modified during the day and all their RECAPDocuments + and RECAPDocuments added/modified during the day and their parent Dockets. + It uses the ES re_index API, + + :param r: Redis client instance. + :param source_index_name: The source Elasticsearch index name from which + documents will be queried. + :param target_index: The target Elasticsearch index to which documents will + be re-indexed. + :param testing: Boolean flag for testing mode. + :param only_rd: Whether to reindex only RECAPDocuments into the + ESRECAPSweepDocument index. + :return: The total number of documents re-indexed. + """ + + if r.exists("alert_sweep:main_re_index_completed"): + logger.info( + "The main re-index task has been completed and will be omitted." + ) + # The main re-indexing has been completed for the day. Abort it and + # proceed with RECAPDocument re-index. + return 0 + + if r.exists("alert_sweep:rd_re_index_completed"): + logger.info( + "The RECAPDocument only re-index task has been completed and will " + "be omitted." + ) + # The RECAPDocument re-indexing has been completed for the day. Abort + # it and proceed with sending alerts. + return 0 + + if not r.exists("alert_sweep:query_date"): + # In case of a failure, store the date when alerts should be queried in + # Redis, so the command can be resumed. + local_now = timezone.localtime().replace(tzinfo=None) + local_midnight = local_now.replace( + hour=0, minute=0, second=0, microsecond=0 + ) + r.set("alert_sweep:query_date", local_midnight.isoformat()) + + else: + # If "alert_sweep:query_date" already exists get it from Redis. + local_midnight_str: str = str(r.get("alert_sweep:query_date")) + local_midnight = datetime.datetime.fromisoformat(local_midnight_str) + logger.info(f"Resuming re-indexing process for date: {local_midnight}") + + es = connections.get_connection() + # Convert the local (PDT) midnight time to UTC + local_timezone = pytz.timezone(timezone.get_current_timezone_name()) + local_midnight_localized = local_timezone.localize(local_midnight) + local_midnight_utc = local_midnight_localized.astimezone(pytz.utc) + next_day_utc = local_midnight_utc + datetime.timedelta(days=1) + + today_datetime_iso = local_midnight_utc.isoformat().replace("+00:00", "Z") + next_day_utc_iso = next_day_utc.isoformat().replace("+00:00", "Z") + # Re Index API query. + query = ( + { + "bool": { + "should": [ + # Dockets added/modified today + { + "bool": { + "must": [ + { + "range": { + "timestamp": { + "gte": today_datetime_iso, + "lt": next_day_utc_iso, + } + } + }, + {"term": {"docket_child": "docket"}}, + ] + } + }, + # RECAPDocuments with parents added/modified today + { + "has_parent": { + "parent_type": "docket", + "query": { + "range": { + "timestamp": { + "gte": today_datetime_iso, + "lt": next_day_utc_iso, + } + } + }, + } + }, + # RECAPDocuments added/modified today + { + "bool": { + "must": [ + { + "range": { + "timestamp": { + "gte": today_datetime_iso, + "lt": next_day_utc_iso, + } + } + }, + {"term": {"docket_child": "recap_document"}}, + ] + } + }, + # Dockets that are parents of RECAPDocuments added/modified today + { + "has_child": { + "type": "recap_document", + "query": { + "range": { + "timestamp": { + "gte": today_datetime_iso, + "lt": next_day_utc_iso, + } + } + }, + } + }, + ] + } + } + if not only_rd + else { + "bool": { + "should": [ + # RECAPDocuments with parents added/modified today + { + "has_parent": { + "parent_type": "docket", + "query": { + "range": { + "timestamp": { + "gte": today_datetime_iso, + "lt": next_day_utc_iso, + } + } + }, + } + }, + # RECAPDocuments added/modified today + { + "bool": { + "must": [ + { + "range": { + "timestamp": { + "gte": today_datetime_iso, + "lt": next_day_utc_iso, + } + } + }, + {"term": {"docket_child": "recap_document"}}, + ] + } + }, + ] + } + } + ) + + if not r.exists("alert_sweep:task_id"): + # Remove the index from the previous day and create a new one. + target_index._index.delete(ignore=404) + target_index.init() + target_index_name = target_index._index._name + + # In case of a failure, store the task_id in Redis so the command + # can be resumed. + params = { + "source": {"index": source_index_name, "query": query}, + "dest": {"index": target_index_name}, + "wait_for_completion": False, + "refresh": True, + } + if only_rd: + # Re-index only RECADocument fields to the ESRECAPSweepDocument + # index + params["script"] = { + "source": """ + def fields = [ + 'id', + 'docket_entry_id', + 'description', + 'entry_number', + 'entry_date_filed', + 'short_description', + 'document_type', + 'document_number', + 'pacer_doc_id', + 'plain_text', + 'attachment_number', + 'is_available', + 'page_count', + 'filepath_local', + 'absolute_url', + 'cites' + ]; + ctx._source.keySet().retainAll(fields); + """ + } + response = es.reindex(**params) + # Store the task ID in Redis + task_id = response["task"] + r.set("alert_sweep:task_id", task_id) + logger.info(f"Re-indexing task scheduled ID: {task_id}") + else: + task_id = r.get("alert_sweep:task_id") + logger.info(f"Resuming re-index task ID: {task_id}") + + initial_wait = 0.01 if testing else 60.0 + time.sleep(initial_wait) + task_info = retrieve_task_info(get_task_status(task_id, es)) + iterations_count = 0 + estimated_time_remaining = compute_estimated_remaining_time( + initial_wait, task_info + ) + while not task_info.completed: + logger.info( + f"Task progress: {task_info.created}/{task_info.total} documents. " + f"Estimated time to finish: {estimated_time_remaining} seconds." + ) + task_status = get_task_status(task_id, es) + task_info = retrieve_task_info(task_status) + time.sleep(estimated_time_remaining) + if task_info and not task_info.completed: + estimated_time_remaining = compute_estimated_remaining_time( + initial_wait, task_info + ) + if not task_info: + iterations_count += 1 + if iterations_count > 10: + logger.error( + "Re_index alert sweep index task has failed: %s/%s", + task_info.created, + task_info.total, + ) + break + + r.delete("alert_sweep:task_id") + return task_info.total + + +def should_docket_hit_be_included( + r: Redis, alert_id: int, docket_id: int, query_date: datetime.date +) -> bool: + """Determine if a Docket alert should be triggered based on its + date_modified and if the docket has triggered the alert previously. + + :param r: The Redis interface. + :param alert_id: The ID of the alert. + :param docket_id: The ID of the docket. + :param query_date: The daily re_index query date. + :return: True if the Docket alert should be triggered, False otherwise. + """ + docket = Docket.objects.filter(id=docket_id).only("date_modified").first() + if not docket: + return False + if not has_document_alert_hit_been_triggered(r, alert_id, "d", docket_id): + # Confirm the docket has been modified during the day we’re sending + # alerts to avoid triggering docket-only alerts due to RECAPDocuments + # related to the case being indexed during the day since RD contains + # docket fields indexed which can trigger docket-only alerts. + date_modified_localized = dt_as_local_date(docket.date_modified) + if date_modified_localized == query_date: + return True + return False + + +def filter_rd_alert_hits( + r: Redis, + alert_id: int, + rd_hits: AttrList, + rd_ids: list[int], + check_rd_matched=False, +): + """Filter RECAP document hits based on specified conditions. + + :param r: The Redis interface. + :param alert_id: The ID of the alert. + :param rd_hits: A list of RECAPDocument hits to be processed. + :param rd_ids: A list of RECAPDocument IDs that matched the RECAPDocument + only query. + :param check_rd_matched: A boolean indicating whether to check if the RECAP + document hit from the main query also matches the RECAPDocument-only query + :return: A list of RECAP document hits that meet all specified conditions. + """ + + rds_to_send = [] + for rd_hit in rd_hits: + conditions = [ + not has_document_alert_hit_been_triggered( + r, alert_id, "r", rd_hit["_source"]["id"] + ) + ] + if check_rd_matched: + # Add condition to check if the RD hit is within the RD IDS returned + # by the RECAPDocument-only query. + conditions.append(rd_hit["_source"]["id"] in rd_ids) + if all(conditions): + rds_to_send.append(rd_hit) + add_document_hit_to_alert_set( + r, alert_id, "r", rd_hit["_source"]["id"] + ) + return rds_to_send + + +def query_alerts( + search_params: QueryDict, +) -> tuple[list[Hit] | None, Response | None, Response | None]: + try: + search_query = RECAPSweepDocument.search() + child_search_query = ESRECAPSweepDocument.search() + return do_es_sweep_alert_query( + search_query, + child_search_query, + search_params, + ) + except ( + UnbalancedParenthesesQuery, + UnbalancedQuotesQuery, + BadProximityQuery, + TransportError, + ConnectionError, + RequestError, + ): + traceback.print_exc() + logger.info(f"Search for this alert failed: {search_params}\n") + return None, None, None + + +def process_alert_hits( + r: Redis, + results: list[Hit], + parent_results: Response | None, + child_results: Response | None, + alert_id: int, + query_date: datetime.date, +) -> list[Hit]: + """Process alert hits by filtering and prepare the results to send based + on alert conditions. + + :param r: The Redis instance. + :param results: A list of Hit objects containing search results. + :param parent_results: The ES Response for the docket-only query. + :param child_results: The ES Response for the RECAPDocument-only query. + :param alert_id: The ID of the alert being processed. + :param query_date: The daily re_index query date. + :return: A list of Hit objects that are filtered and prepared to be sent. + """ + docket_hits = parent_results.hits if parent_results else [] + docket_ids = [int(d.docket_id) for d in docket_hits] + + rd_hits = child_results.hits if child_results else [] + rd_ids = [int(r.id) for r in rd_hits] + results_to_send = [] + if len(results) > 0: + for hit in results: + if hit.docket_id in docket_ids: + # Possible Docket-only alert + rds_to_send = filter_rd_alert_hits( + r, + alert_id, + hit["child_docs"], + rd_ids, + check_rd_matched=True, + ) + if rds_to_send: + # Docket OR RECAPDocument alert. + hit["child_docs"] = rds_to_send + results_to_send.append(hit) + if should_docket_hit_be_included( + r, alert_id, hit.docket_id, query_date + ): + add_document_hit_to_alert_set( + r, alert_id, "d", hit.docket_id + ) + + elif should_docket_hit_be_included( + r, alert_id, hit.docket_id, query_date + ): + # Docket-only alert + hit["child_docs"] = [] + results_to_send.append(hit) + add_document_hit_to_alert_set( + r, alert_id, "d", hit.docket_id + ) + else: + # RECAPDocument-only alerts or cross-object alerts + rds_to_send = filter_rd_alert_hits( + r, alert_id, hit["child_docs"], rd_ids + ) + if rds_to_send: + hit["child_docs"] = rds_to_send + results_to_send.append(hit) + return results_to_send + + +def send_search_alert_webhooks( + user: UserProfile.user, results_to_send: list[Hit], alert_id: int +) -> None: + """Send webhook events for search alerts if the user has SEARCH_ALERT + endpoints enabled. + + :param user: The user object whose webhooks need to be checked. + :param results_to_send: A list of Hit objects that contain the search + results to be sent. + :param alert_id: The Alert ID to be sent in the webhook. + """ + user_webhooks = user.webhooks.filter( + event_type=WebhookEventType.SEARCH_ALERT, enabled=True + ) + for user_webhook in user_webhooks: + send_search_alert_webhook_es.delay( + results_to_send, user_webhook.pk, alert_id + ) + + +def query_and_send_alerts( + r: Redis, rate: Literal["rt", "dly"], query_date: datetime.date +) -> None: + """Query the sweep index and send alerts based on the specified rate + and date. + + :param r: The Redis interface. + :param rate: The rate at which to query alerts. + :param query_date: The daily re_index query date. + :return: None. + """ + + alert_users: UserProfile.user = User.objects.filter( + alerts__rate=rate + ).distinct() + alerts_sent_count = 0 + now_time = datetime.datetime.now() + for user in alert_users: + if rate == Alert.REAL_TIME and not user.profile.is_member: + continue + alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP) + logger.info(f"Running alerts for user '{user}': {alerts}") + + hits = [] + alerts_to_update = [] + for alert in alerts: + search_params = QueryDict(alert.query.encode(), mutable=True) + results, parent_results, child_results = query_alerts( + search_params + ) + if not results: + continue + alerts_to_update.append(alert.pk) + search_type = search_params.get("type", SEARCH_TYPES.RECAP) + results_to_send = process_alert_hits( + r, results, parent_results, child_results, alert.pk, query_date + ) + if not results_to_send: + continue + hits.append( + [ + alert, + search_type, + results_to_send, + len(results_to_send), + ] + ) + alert.query_run = search_params.urlencode() # type: ignore + alert.date_last_hit = timezone.now() + alert.save() + + # Send webhooks + send_search_alert_webhooks(user, results_to_send, alert.pk) + + if hits: + send_search_alert_emails.delay([(user.pk, hits)]) + alerts_sent_count += 1 + + # Update Alert's date_last_hit in bulk. + Alert.objects.filter(id__in=alerts_to_update).update( + date_last_hit=now_time + ) + async_to_sync(tally_stat)(f"alerts.sent.{rate}", inc=alerts_sent_count) + logger.info(f"Sent {alerts_sent_count} {rate} email alerts.") + + +def query_and_schedule_alerts( + r: Redis, rate: Literal["wly", "mly"], query_date: datetime.date +) -> None: + """Query the sweep index and schedule alerts based on the specified rate + and date. + + :param r: The Redis interface. + :param rate: The rate at which to query alerts. + :param query_date: The daily re_index query date. + :return: None. + """ + + alert_users = User.objects.filter(alerts__rate=rate).distinct() + for user in alert_users: + alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP) + logger.info(f"Running '{rate}' alerts for user '{user}': {alerts}") + scheduled_hits_to_create = [] + for alert in alerts: + search_params = QueryDict(alert.query.encode(), mutable=True) + results, parent_results, child_results = query_alerts( + search_params + ) + if not results: + continue + + results_to_send = process_alert_hits( + r, results, parent_results, child_results, alert.pk, query_date + ) + if not results_to_send: + continue + for hit in results_to_send: + # Schedule DAILY, WEEKLY and MONTHLY Alerts + if alert_hits_limit_reached(alert.pk, user.pk): + # Skip storing hits for this alert-user combination because + # the SCHEDULED_ALERT_HITS_LIMIT has been reached. + continue + + child_result_objects = [] + hit_copy = copy.deepcopy(hit) + if hasattr(hit_copy, "child_docs"): + for child_doc in hit_copy.child_docs: + child_result_objects.append( + child_doc["_source"].to_dict() + ) + hit_copy["child_docs"] = child_result_objects + scheduled_hits_to_create.append( + ScheduledAlertHit( + user=user, + alert=alert, + document_content=hit_copy.to_dict(), + ) + ) + # Send webhooks + send_search_alert_webhooks(user, results_to_send, alert.pk) + + # Create scheduled WEEKLY and MONTHLY Alerts in bulk. + if scheduled_hits_to_create: + ScheduledAlertHit.objects.bulk_create(scheduled_hits_to_create) + + +class Command(VerboseCommand): + """Query and re-index (into the RECAP sweep index) all the RECAP content + that has changed during the current period, along with their related + documents. Then use the RECAP sweep index to query and send real-time and + daily RECAP alerts. Finally, schedule weekly and monthly RECAP alerts. + """ + + help = "Send RECAP Search Alerts." + + def add_arguments(self, parser): + parser.add_argument( + "--testing-mode", + action="store_true", + help="Use this flag for testing purposes.", + ) + + def handle(self, *args, **options): + super().handle(*args, **options) + testing_mode = options.get("testing_mode", False) + r = get_redis_interface("CACHE") + index_daily_recap_documents( + r, + DocketDocument._index._name, + RECAPSweepDocument, + testing=testing_mode, + ) + if not testing_mode: + # main_re_index_completed key so the main re_index task can be + # omitted in case of a failure. + r.set("alert_sweep:main_re_index_completed", 1, ex=3600 * 12) + index_daily_recap_documents( + r, + DocketDocument._index._name, + ESRECAPSweepDocument, + testing=testing_mode, + only_rd=True, + ) + if not testing_mode: + # rd_re_index_completed key so the RECAPDocument re_index task + # can be omitted in case of a failure. + r.set("alert_sweep:rd_re_index_completed", 1, ex=3600 * 12) + + query_date = timezone.localtime( + timezone.make_aware( + datetime.datetime.fromisoformat( + str(r.get("alert_sweep:query_date")) + ) + ) + ).date() + query_and_send_alerts(r, Alert.REAL_TIME, query_date) + query_and_send_alerts(r, Alert.DAILY, query_date) + query_and_schedule_alerts(r, Alert.WEEKLY, query_date) + query_and_schedule_alerts(r, Alert.MONTHLY, query_date) + r.delete("alert_sweep:main_re_index_completed") + r.delete("alert_sweep:rd_re_index_completed") + r.delete("alert_sweep:query_date") diff --git a/cl/alerts/management/commands/cl_send_scheduled_alerts.py b/cl/alerts/management/commands/cl_send_scheduled_alerts.py index 8fefd675f3..e0b23c8420 100644 --- a/cl/alerts/management/commands/cl_send_scheduled_alerts.py +++ b/cl/alerts/management/commands/cl_send_scheduled_alerts.py @@ -99,7 +99,9 @@ def query_and_send_alerts_by_rate(rate: str) -> None: ) ) if hits: - send_search_alert_emails.delay([(user_id, hits)]) + send_search_alert_emails.delay( + [(user_id, hits)], scheduled_alert=True + ) alerts_sent_count += 1 # Update Alert's date_last_hit in bulk. diff --git a/cl/alerts/management/commands/clean_up_search_alerts.py b/cl/alerts/management/commands/clean_up_search_alerts.py index b00d7128a3..df8a28b2d6 100644 --- a/cl/alerts/management/commands/clean_up_search_alerts.py +++ b/cl/alerts/management/commands/clean_up_search_alerts.py @@ -75,7 +75,8 @@ def validate_queries_syntax(options: OptionsType) -> None: if search_form.is_valid(): cd = search_form.cleaned_data try: - s, _ = build_es_base_query(search_query, cd) + es_queries = build_es_base_query(search_query, cd) + s = es_queries.search_query s = s.extra(size=0) s.execute().to_dict() # Waiting between requests to avoid hammering ES too quickly. diff --git a/cl/alerts/tasks.py b/cl/alerts/tasks.py index da43d8155d..e3aad0a071 100644 --- a/cl/alerts/tasks.py +++ b/cl/alerts/tasks.py @@ -25,7 +25,7 @@ from cl.api.models import WebhookEventType from cl.api.tasks import ( send_docket_alert_webhook_events, - send_es_search_alert_webhook, + send_search_alert_webhook_es, ) from cl.celery_init import app from cl.custom_filters.templatetags.text_filters import best_case_name @@ -458,22 +458,25 @@ def send_webhook_alert_hits( event_type=WebhookEventType.SEARCH_ALERT, enabled=True ) for user_webhook in user_webhooks: - send_es_search_alert_webhook.delay( + send_search_alert_webhook_es.delay( documents, user_webhook.pk, - alert, + alert.pk, ) @app.task(ignore_result=True) def send_search_alert_emails( - email_alerts_to_send: list[tuple[int, list[SearchAlertHitType]]] + email_alerts_to_send: list[tuple[int, list[SearchAlertHitType]]], + scheduled_alert: bool = False, ) -> None: """Send search alert emails for multiple users. :param email_alerts_to_send: A list of two tuples containing the user to whom the alerts should be sent. A list of tuples containing the Search Alert, (Alert, search type, documents, and number of documents) + :param scheduled_alert: A boolean indicating weather this alert has been + scheduled :return: None """ @@ -491,6 +494,7 @@ def send_search_alert_emails( context = { "hits": hits, "hits_limit": settings.SCHEDULED_ALERT_HITS_LIMIT, + "scheduled_alert": scheduled_alert, } headers = {} query_string = "" diff --git a/cl/alerts/templates/alert_email_es.html b/cl/alerts/templates/alert_email_es.html index 36ccee5ccc..dc2f797268 100644 --- a/cl/alerts/templates/alert_email_es.html +++ b/cl/alerts/templates/alert_email_es.html @@ -25,7 +25,7 @@

- Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% endif %} alert — {{alert.name}} — had {{num_results}}{% if num_results >= hits_limit %}+{% endif %} hit{{results|pluralize}}: + Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% elif type == 'r' %}RECAP{% endif %} alert — {{alert.name}} — had {{num_results}}{% if num_results >= hits_limit %}+{% endif %} hit{{results|pluralize}}:

View Full Results / Edit this Alert
@@ -34,28 +34,66 @@

- + {{ forloop.counter }}. {{ result|get_highlight:"caseName"|safe }} - ({% if result.court_id != 'scotus' %}{{ result|get_highlight:"court_citation_string"|nbsp|safe }} {% endif %}{% if type == 'o' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %}) + ({% if result.court_id != 'scotus' %}{{ result|get_highlight:"court_citation_string"|nbsp|safe }} {% endif %}{% if type == 'o' or type == 'r' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})

-

- - View original: - - {% if result.download_url %} - - From the court + {% if type == 'r' %} + {% if result.docketNumber %} + Docket Number: + {{ result|get_highlight:"docketNumber"|safe }} + {% endif %} +
+ Date Filed: + {% if result.dateFiled %} + {{ result.dateFiled|date:"F jS, Y" }} + {% else %} + Unknown Date + {% endif %} +

+ {% if result.child_docs and result.child_remaining %} + {% extract_q_value alert.query_run as q_value %} + + View Additional Results for this Case + +
+ {% endif %} + {% else %} +

+ + View original: + + {% if result.download_url %} + + From the court + +   |   + {% endif %} + {% if result.local_path %} + {# Provide link to S3. #} + + Our backup -   |   {% endif %} - {% if result.local_path %} - {# Provide link to S3. #} - - Our backup - +

{% endif %} -

{% if type == 'oa' %}

Date Argued: diff --git a/cl/alerts/templates/alert_email_es.txt b/cl/alerts/templates/alert_email_es.txt index db8363e713..2b7ec3b569 100644 --- a/cl/alerts/templates/alert_email_es.txt +++ b/cl/alerts/templates/alert_email_es.txt @@ -10,16 +10,23 @@ CourtListener.com We have news regarding your alerts at CourtListener.com ------------------------------------------------------- -{% for alert, type, results, num_results in hits %}{% for result in results %}{% if forloop.first %}Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% endif %} alert -- {{alert.name}} -- had {{num_results}}{% if num_results >= hits_limit %}+{% endif %} hit{{results|pluralize}}: +{% for alert, type, results, num_results in hits %}{% for result in results %}{% if forloop.first %}Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% elif type == 'r' %}RECAP{% endif %} alert -- {{alert.name}} -- had {{num_results}}{% if num_results >= hits_limit %}+{% endif %} hit{{results|pluralize}}: ------------------------------------------------------- View Full Results / Edit this Alert: https://www.courtlistener.com/?{{ alert.query_run|safe }}&edit_alert={{ alert.pk }} Disable this Alert (one click): https://www.courtlistener.com{% url "disable_alert" alert.secret_key %}{% endif %} -{{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %}) +{{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' or type == 'r' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %}) {% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}{% endif %} -{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %} +{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}{% endif %} +{% if type == 'r' %}{% if result.dateFiled %}Date Filed: {{ result.dateFiled|date:"F jS, Y" }}{% else %}Date Filed: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} +{% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:scheduled_alert %} - {% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe|striptags }} - {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %} + {% if doc.description %}Description: {{ doc.description|render_string_or_list|safe|striptags }}{% endif %} + {% if doc.plain_text %}{% contains_highlights doc.plain_text.0 True as highlighted %}{% if highlighted %}...{% endif %}{{ doc.plain_text|render_string_or_list|safe|striptags|underscore_to_space }}...{% endif %} + View this document on our site: https://www.courtlistener.com{% if doc.absolute_url %}{{ doc.absolute_url }}{% else %}{{ result.docket_absolute_url }}#minute-entry-{{ doc.docket_entry_id }}{% endif %} +{% endwith %}{% endfor %} +{% if result.child_docs and result.child_remaining %}{% extract_q_value alert.query_run as q_value %}View Additional Results for this Case: https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}{% endif %} {% endif %}~~~~~ - - View this item on our site: https://www.courtlistener.com{{result.absolute_url}} + - View this item on our site: https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %} {% if result.download_url %} - Download original from the court: {{result.download_url}} {% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endfor %} diff --git a/cl/alerts/tests/__init__.py b/cl/alerts/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cl/alerts/tests.py b/cl/alerts/tests/tests.py similarity index 100% rename from cl/alerts/tests.py rename to cl/alerts/tests/tests.py diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py new file mode 100644 index 0000000000..8e57da973a --- /dev/null +++ b/cl/alerts/tests/tests_recap_alerts.py @@ -0,0 +1,1627 @@ +import datetime +from unittest import mock + +import time_machine +from asgiref.sync import sync_to_async +from django.core import mail +from django.core.management import call_command +from django.test.utils import override_settings +from django.urls import reverse +from django.utils.html import strip_tags +from django.utils.timezone import now +from elasticsearch_dsl import Q +from lxml import html + +from cl.alerts.factories import AlertFactory +from cl.alerts.management.commands.cl_send_recap_alerts import ( + index_daily_recap_documents, +) +from cl.alerts.models import SEARCH_TYPES, Alert, ScheduledAlertHit +from cl.api.factories import WebhookFactory +from cl.api.models import WebhookEvent, WebhookEventType +from cl.donate.models import NeonMembership +from cl.lib.elasticsearch_utils import do_es_sweep_alert_query +from cl.lib.redis_utils import get_redis_interface +from cl.lib.test_helpers import RECAPSearchTestCase +from cl.search.documents import DocketDocument, RECAPSweepDocument +from cl.search.factories import ( + DocketEntryWithParentsFactory, + DocketFactory, + RECAPDocumentFactory, +) +from cl.search.models import Docket +from cl.tests.cases import ESIndexTestCase, TestCase +from cl.tests.utils import MockResponse +from cl.users.factories import UserProfileWithParentsFactory + + +class RECAPAlertsSweepIndexTest( + RECAPSearchTestCase, ESIndexTestCase, TestCase +): + """ + RECAP Alerts Sweep Index Tests + """ + + @classmethod + def setUpTestData(cls): + cls.rebuild_index("people_db.Person") + cls.rebuild_index("search.Docket") + cls.mock_date = now() + with time_machine.travel(cls.mock_date, tick=False): + super().setUpTestData() + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + + cls.user_profile = UserProfileWithParentsFactory() + NeonMembership.objects.create( + level=NeonMembership.LEGACY, user=cls.user_profile.user + ) + cls.user_profile_2 = UserProfileWithParentsFactory() + NeonMembership.objects.create( + level=NeonMembership.LEGACY, user=cls.user_profile_2.user + ) + cls.user_profile_no_member = UserProfileWithParentsFactory() + cls.webhook_enabled = WebhookFactory( + user=cls.user_profile.user, + event_type=WebhookEventType.SEARCH_ALERT, + url="https://example.com/", + enabled=True, + ) + + def setUp(self): + self.r = get_redis_interface("CACHE") + self.r.delete("alert_sweep:query_date") + self.r.delete("alert_sweep:task_id") + keys = self.r.keys("alert_hits:*") + if keys: + self.r.delete(*keys) + + @staticmethod + def get_html_content_from_email(email_content): + html_content = None + for content, content_type in email_content.alternatives: + if content_type == "text/html": + html_content = content + break + return html_content + + def _confirm_number_of_alerts(self, html_content, expected_count): + """Test the number of alerts included in the email alert.""" + tree = html.fromstring(html_content) + got = len(tree.xpath("//h2")) + + self.assertEqual( + got, + expected_count, + msg="Did not get the right number of alerts in the email. " + "Expected: %s - Got: %s\n\n" % (expected_count, got), + ) + + @staticmethod + def _extract_cases_from_alert(html_tree, alert_title): + """Extract the case elements (h3) under a specific alert (h2) from the + HTML tree. + """ + alert_element = html_tree.xpath( + f"//h2[contains(text(), '{alert_title}')]" + ) + h2_elements = html_tree.xpath("//h2") + alert_index = h2_elements.index(alert_element[0]) + # Find the

elements between this

and the next

+ if alert_index + 1 < len(h2_elements): + next_alert_element = h2_elements[alert_index + 1] + alert_cases = html_tree.xpath( + f"//h2[contains(text(), '{alert_title}')]/following-sibling::*[following-sibling::h2[1] = '{next_alert_element.text}'][self::h3]" + ) + else: + alert_cases = html_tree.xpath( + f"//h2[contains(text(), '{alert_title}')]/following-sibling::h3" + ) + return alert_cases + + def _count_alert_hits_and_child_hits( + self, + html_content, + alert_title, + expected_hits, + case_title, + expected_child_hits, + ): + """Confirm the following assertions for the email alert: + - An specific alert is included in the email alert. + - The specified alert contains the expected number of hits. + - The specified case contains the expected number of child hits. + """ + tree = html.fromstring(html_content) + alert_element = tree.xpath(f"//h2[contains(text(), '{alert_title}')]") + self.assertTrue( + alert_element, msg=f"Not alert with title {alert_title} found." + ) + alert_cases = self._extract_cases_from_alert(tree, alert_title) + self.assertEqual( + len(alert_cases), + expected_hits, + msg="Did not get the right number of hits for the alert %s. " + "Expected: %s - Got: %s\n\n" + % (alert_title, expected_hits, len(alert_cases)), + ) + if case_title: + for case in alert_cases: + child_hit_count = 0 + case_text = " ".join( + [element.strip() for element in case.xpath(".//text()")] + ) + if case_title in case_text: + child_hit_count = len( + case.xpath("following-sibling::ul[1]/li/a") + ) + self.assertEqual( + child_hit_count, + expected_child_hits, + msg="Did not get the right number of child hits for the case %s. " + "Expected: %s - Got: %s\n\n" + % (case_title, expected_child_hits, child_hit_count), + ) + break + + def _assert_child_hits_content( + self, + html_content, + alert_title, + case_title, + expected_child_descriptions, + ): + """Confirm the child hits in a case are the expected ones, comparing + their descriptions. + """ + tree = html.fromstring(html_content) + alert_element = tree.xpath(f"//h2[contains(text(), '{alert_title}')]") + # Find the corresponding case_title under the alert_element + alert_cases = self._extract_cases_from_alert(tree, alert_title) + + def extract_child_descriptions(case_item): + child_documents = case_item.xpath("./following-sibling::ul[1]/li") + results = [] + for li in child_documents: + a_tag = li.xpath(".//a")[0] + full_text = a_tag.text_content() + first_part = full_text.split("\u2014")[0].strip() + results.append(first_part) + + return results + + child_descriptions = set() + for case in alert_cases: + case_text = "".join(case.xpath(".//text()")).strip() + if case_title in case_text: + child_descriptions = set(extract_child_descriptions(case)) + break + + self.assertEqual( + child_descriptions, + set(expected_child_descriptions), + msg=f"Child hits didn't match for case {case_title}, Got {child_descriptions}, Expected: {expected_child_descriptions} ", + ) + + def _count_webhook_hits_and_child_hits( + self, + webhooks, + alert_title, + expected_hits, + case_title, + expected_child_hits, + ): + """Confirm the following assertions for the search alert webhook: + - An specific alert webhook was triggered. + - The specified alert contains the expected number of hits. + - The specified case contains the expected number of child hits. + """ + + for webhook in webhooks: + if webhook["payload"]["alert"]["name"] == alert_title: + webhook_cases = webhook["payload"]["results"] + self.assertEqual( + len(webhook_cases), + expected_hits, + msg=f"Did not get the right number of hits for the alert %s. " + % alert_title, + ) + for case in webhook["payload"]["results"]: + if case_title == strip_tags(case["caseName"]): + self.assertEqual( + len(case["recap_documents"]), + expected_child_hits, + msg=f"Did not get the right number of child documents for the case %s. " + % case_title, + ) + + def test_filter_recap_alerts_to_send(self) -> None: + """Test filter RECAP alerts that met the conditions to be sent: + - RECAP type alert. + - RT or DLY rate + - For RT rate the user must have an active membership. + """ + + rt_recap_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test RT RECAP Alert", + query='q="401 Civil"&type=r', + ) + dly_recap_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.DAILY, + name="Test DLY RECAP Alert", + query='q="401 Civil"&type=r', + ) + AlertFactory( + user=self.user_profile_2.user, + rate=Alert.REAL_TIME, + name="Test RT RECAP Alert", + query='q="401 Civil"', + ) + AlertFactory( + user=self.user_profile_no_member.user, + rate=Alert.REAL_TIME, + name="Test RT RECAP Alert no Member", + query='q="401 Civil"&type=r', + ) + + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ): + call_command("cl_send_recap_alerts", testing_mode=True) + + # Only the RECAP RT alert for a member and the RECAP DLY alert are sent. + self.assertEqual( + len(mail.outbox), 2, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[0]) + self.assertIn(rt_recap_alert.name, html_content) + + html_content = self.get_html_content_from_email(mail.outbox[1]) + self.assertIn(dly_recap_alert.name, html_content) + + def test_index_daily_recap_documents(self) -> None: + """Test index_daily_recap_documents method over different documents + conditions. + """ + RECAPSweepDocument._index.delete(ignore=404) + RECAPSweepDocument.init() + recap_search = DocketDocument.search() + recap_dockets = recap_search.query(Q("match", docket_child="docket")) + self.assertEqual(recap_dockets.count(), 2) + + recap_documents = recap_search.query( + Q("match", docket_child="recap_document") + ) + self.assertEqual(recap_documents.count(), 3) + + sweep_search = RECAPSweepDocument.search() + self.assertEqual( + sweep_search.count(), + 0, + msg="Wrong number of documents in the sweep index.", + ) + + # Index documents based Dockets changed today + all their + # RECAPDocuments indexed the same day. + with time_machine.travel(self.mock_date, tick=False): + documents_indexed = index_daily_recap_documents( + self.r, + DocketDocument._index._name, + RECAPSweepDocument, + testing=True, + ) + self.assertEqual( + documents_indexed, 5, msg="Wrong number of documents indexed." + ) + + sweep_search = RECAPSweepDocument.search() + dockets_sweep = sweep_search.query(Q("match", docket_child="docket")) + self.assertEqual(dockets_sweep.count(), 2) + + documents_sweep = sweep_search.query( + Q("match", docket_child="recap_document") + ) + self.assertEqual(documents_sweep.count(), 3) + + # Index Docket changed today + their RECAPDocuments indexed on + # previous days + with time_machine.travel(self.mock_date, tick=False): + docket = DocketFactory( + court=self.court, + case_name="SUBPOENAS SERVED CASE", + docket_number="1:21-bk-1234", + source=Docket.RECAP, + ) + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + + # Its related RD is ingested two days before. + two_days_before = now() - datetime.timedelta(days=2) + mock_two_days_before = two_days_before.replace(hour=5) + with time_machine.travel(mock_two_days_before, tick=False): + alert_de = DocketEntryWithParentsFactory( + docket=docket, + entry_number=1, + date_filed=datetime.date(2024, 8, 19), + description="MOTION for Leave to File Amicus Curiae Lorem Served", + ) + rd = RECAPDocumentFactory( + docket_entry=alert_de, + description="Motion to File", + document_number="1", + is_available=True, + ) + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + + # Run the indexer. + with time_machine.travel(self.mock_date, tick=False): + documents_indexed = index_daily_recap_documents( + self.r, + DocketDocument._index._name, + RECAPSweepDocument, + testing=True, + ) + self.assertEqual( + documents_indexed, 7, msg="Wrong number of documents indexed." + ) + + # Index a RECAPDocument changed today including its parent Docket + # indexed on previous days. + with time_machine.travel(mock_two_days_before, tick=False): + docket_2 = DocketFactory( + court=self.court, + case_name="SUBPOENAS SERVED CASE OFF", + docket_number="1:21-bk-1250", + source=Docket.RECAP, + ) + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + + # Its related RD is ingested today. + with time_machine.travel(self.mock_date, tick=False): + alert_de_2 = DocketEntryWithParentsFactory( + docket=docket_2, + entry_number=1, + date_filed=datetime.date(2024, 8, 19), + description="MOTION for Leave to File Amicus Curiae Lorem Served", + ) + rd_2 = RECAPDocumentFactory( + docket_entry=alert_de_2, + description="Motion to File Lorem", + document_number="2", + ) + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + + # Run the indexer. + with time_machine.travel(self.mock_date, tick=False): + documents_indexed = index_daily_recap_documents( + self.r, + DocketDocument._index._name, + RECAPSweepDocument, + testing=True, + ) + self.assertEqual( + documents_indexed, 9, msg="Wrong number of documents indexed." + ) + + # Docket and RD created on previous days, will be used later to confirm + # documents got indexed into the sweep index after partial updates. + three_days_before = now() - datetime.timedelta(days=5) + mock_three_days_before = three_days_before.replace(hour=5) + with time_machine.travel( + mock_three_days_before, tick=False + ), self.captureOnCommitCallbacks(execute=True): + docket_old = DocketFactory( + court=self.court, + case_name="SUBPOENAS SERVED LOREM OFF", + docket_number="1:21-bk-1254", + source=Docket.RECAP, + ) + alert_de_old = DocketEntryWithParentsFactory( + docket=docket_old, + entry_number=1, + date_filed=datetime.date(2024, 8, 19), + description="MOTION for Leave to File Amicus Curiae Lorem Served", + ) + rd_old = RECAPDocumentFactory( + docket_entry=alert_de_old, + description="Motion to File", + document_number="1", + is_available=True, + ) + rd_old_2 = RECAPDocumentFactory( + docket_entry=alert_de_old, + description="Motion to File 2", + document_number="2", + is_available=True, + ) + + # Run the indexer. No new documents re_indexed. + with time_machine.travel(self.mock_date, tick=False): + documents_indexed = index_daily_recap_documents( + self.r, + DocketDocument._index._name, + RECAPSweepDocument, + testing=True, + ) + self.assertEqual( + documents_indexed, 9, msg="Wrong number of documents indexed." + ) + + # Update the documents today: + with time_machine.travel( + self.mock_date, tick=False + ), self.captureOnCommitCallbacks(execute=True): + rd_old_2.document_number = 3 + rd_old_2.save() + + # Run the indexer. No new documents re_indexed. + with time_machine.travel(self.mock_date, tick=False): + documents_indexed = index_daily_recap_documents( + self.r, + DocketDocument._index._name, + RECAPSweepDocument, + testing=True, + ) + self.assertEqual( + documents_indexed, 11, msg="Wrong number of documents indexed." + ) + + # Update the Docket today: + with time_machine.travel( + self.mock_date, tick=False + ), self.captureOnCommitCallbacks(execute=True): + docket_old.case_name = "SUBPOENAS SERVED LOREM OFF UPDATED" + docket_old.save() + + # Run the indexer. No new documents re_indexed. + with time_machine.travel(self.mock_date, tick=False): + documents_indexed = index_daily_recap_documents( + self.r, + DocketDocument._index._name, + RECAPSweepDocument, + testing=True, + ) + self.assertEqual( + documents_indexed, 12, msg="Wrong number of documents indexed." + ) + + docket_old.delete() + docket.delete() + docket_2.delete() + + def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None: + """Test RECAP alerts can be properly filtered out according to + their query and hits matched conditions. + + - Docket-only Alerts should be triggered only if the Docket was + modified on the day. This prevents sending Alerts due to related + RDs added on the same day which can match the query due to parent + fields indexed into the RDs. + - The Docket or RD shouldn’t have triggered the alert previously. + - RECAP-only Alerts should only include RDs that have not triggered the + same alert previously. If there are no hits after filtering RDs, + don’t send the alert. + - Cross-object queries should only include RDs that have not triggered + the same alert previously. If there are no hits after filtering RDs, + don’t send the alert. + + Assert the content structure accordingly. + """ + + # This docket-only alert matches a Docket ingested today. + docket_only_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Docket Only", + query='q="401 Civil"&type=r', + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + self.assertEqual( + len(mail.outbox), 1, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[0]) + self.assertIn(docket_only_alert.name, html_content) + self._confirm_number_of_alerts(html_content, 1) + # The docket-only alert doesn't contain any nested child hits. + self._count_alert_hits_and_child_hits( + html_content, + docket_only_alert.name, + 1, + self.de.docket.case_name, + 0, + ) + + # Assert email text version: + txt_email = mail.outbox[0].body + self.assertIn(docket_only_alert.name, txt_email) + + # The following test shouldn't match the Docket-only alert when the RD + # is added today since its parent Docket was not modified today. + AlertFactory( + user=self.user_profile_2.user, + rate=Alert.REAL_TIME, + name="Test Alert Docket Only Not Triggered", + query='q="405 Civil"&type=r', + ) + # Simulate docket is ingested a day before. + one_day_before = self.mock_date - datetime.timedelta(days=1) + with time_machine.travel(one_day_before, tick=False): + docket = DocketFactory( + court=self.court, + case_name="SUBPOENAS SERVED CASE", + case_name_full="Jackson & Sons Holdings vs. Bank", + docket_number="1:21-bk-1234", + nature_of_suit="440", + source=Docket.RECAP, + cause="405 Civil", + jurisdiction_type="'U.S. Government Defendant", + jury_demand="1,000,000", + ) + + # Its related RD is ingested today. + with time_machine.travel(self.mock_date, tick=False): + alert_de = DocketEntryWithParentsFactory( + docket=docket, + entry_number=1, + date_filed=datetime.date(2024, 8, 19), + description="MOTION for Leave to File Amicus Curiae Lorem Served", + ) + rd = RECAPDocumentFactory( + docket_entry=alert_de, + description="Motion to File", + document_number="1", + is_available=True, + page_count=5, + pacer_doc_id="018036652436", + plain_text="plain text for 018036652436", + ) + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + # The RD ingestion's shouldn't match the docket-only alert. + self.assertEqual( + len(mail.outbox), 1, msg="Outgoing emails don't match." + ) + + # Test a RECAP-only alert query. + recap_only_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert RECAP Only", + query='q="plain text for 018036652436"&type=r', + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + # 1 New alert should be triggered. + self.assertEqual( + len(mail.outbox), 2, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[1]) + self._confirm_number_of_alerts(html_content, 1) + # Only one child hit should be included in the case within the alert. + self._count_alert_hits_and_child_hits( + html_content, + recap_only_alert.name, + 1, + alert_de.docket.case_name, + 1, + ) + self._assert_child_hits_content( + html_content, + recap_only_alert.name, + alert_de.docket.case_name, + [rd.description], + ) + # Assert email text version: + txt_email = mail.outbox[1].body + self.assertIn(recap_only_alert.name, txt_email) + self.assertIn(rd.description, txt_email) + + # Trigger the same alert again to confirm that no new alert is + # triggered because previous hits have already triggered the same alert + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + # No new alert should be triggered. + self.assertEqual( + len(mail.outbox), 2, msg="Outgoing emails don't match." + ) + + with time_machine.travel(self.mock_date, tick=False): + # Create a new RD for the same DocketEntry to confirm this new RD is + # properly included in the alert email. + rd_2 = RECAPDocumentFactory( + docket_entry=alert_de, + description="Motion to File 2", + document_number="2", + is_available=True, + page_count=3, + pacer_doc_id="018036652436", + plain_text="plain text for 018036652436", + ) + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # A new alert should be triggered containing only the new RD created. + self.assertEqual( + len(mail.outbox), 3, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[2]) + self._confirm_number_of_alerts(html_content, 1) + self._assert_child_hits_content( + html_content, + recap_only_alert.name, + alert_de.docket.case_name, + [rd_2.description], + ) + + # The following test confirms that hits previously matched with other + # alerts can match a different alert. + recap_only_alert_2 = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert RECAP Only Docket Entry", + query=f"q=docket_entry_id:{alert_de.pk}&type=r", + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # A new alert should be triggered containing two RDs (rd and rd_2) + self.assertEqual( + len(mail.outbox), 4, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[3]) + self._confirm_number_of_alerts(html_content, 1) + self._assert_child_hits_content( + html_content, + recap_only_alert_2.name, + alert_de.docket.case_name, + [rd.description, rd_2.description], + ) + # Assert email text version: + txt_email = mail.outbox[3].body + self.assertIn(recap_only_alert.name, txt_email) + self.assertIn(rd.description, txt_email) + self.assertIn(rd_2.description, txt_email) + + # The following test confirms that a cross-object alert is properly + # matched and triggered + cross_object_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Cross-object query", + query=f'q="Motion to File 2"&docket_number={docket.docket_number}&type=r', + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # A new alert should be triggered containing one RD (rd_2) + self.assertEqual( + len(mail.outbox), 5, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[4]) + self._confirm_number_of_alerts(html_content, 1) + self._assert_child_hits_content( + html_content, + cross_object_alert.name, + alert_de.docket.case_name, + [rd_2.description], + ) + # Assert email text version: + txt_email = mail.outbox[4].body + self.assertIn(cross_object_alert.name, txt_email) + self.assertIn(rd_2.description, txt_email) + + docket.delete() + + def test_special_cross_object_alerts_or_clause(self) -> None: + """This test confirms that hits are properly filtered out or included + in alerts for special cross-object alerts that can match either a + Docket-only hit and/or Docket + RDs simultaneously in the same hit. + These cases include queries that use an OR clause combining + Docket field + RD fields. + """ + + # The following test confirms that an alert with a query that can match + # a Docket or RECAPDocuments from different cases simultaneously are + # properly filtered. + cross_object_alert_d_or_rd_field = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Cross-object query", + query=f"q=docket_id:{self.de.docket.pk} OR pacer_doc_id:{self.rd_2.pacer_doc_id}&type=r", + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # A new alert should be triggered containing a Docket-only hit and a + # Docket with the nested RD matched. + self.assertEqual( + len(mail.outbox), 1, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[0]) + self._confirm_number_of_alerts(html_content, 1) + + # This hit should only display the Docket matched by its ID, + # no RECAPDocument should be matched. + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_d_or_rd_field.name, + 2, + self.de.docket.case_name, + 0, + ) + # The second hit should display the rd_2 nested below its parent docket. + self._assert_child_hits_content( + html_content, + cross_object_alert_d_or_rd_field.name, + self.de_1.docket.case_name, + [self.rd_2.description], + ) + # Assert email text version: + txt_email = mail.outbox[0].body + self.assertIn(cross_object_alert_d_or_rd_field.name, txt_email) + self.assertIn(self.rd_2.description, txt_email) + + # This test confirms that we're able to trigger cross-object alerts + # that include an OR clause and match documents that belong to the + # same case. + cross_object_alert_d_or_rd_field_same_case = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Cross-object query", + query=f"q=docket_id:{self.de.docket.pk} OR pacer_doc_id:{self.rd.pacer_doc_id}&type=r", + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # A new alert should be triggered, containing the RD document nested + # below its parent docket. + self.assertEqual( + len(mail.outbox), 2, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[1]) + self._confirm_number_of_alerts(html_content, 1) + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_d_or_rd_field.name, + 1, + self.de.docket.case_name, + 1, + ) + self._assert_child_hits_content( + html_content, + cross_object_alert_d_or_rd_field_same_case.name, + self.de.docket.case_name, + [self.rd.description], + ) + + def test_special_cross_object_alerts_text_query(self) -> None: + """This test confirms that hits are properly filtered out or included + in alerts for special cross-object alerts that can match either a + Docket-only hit and/or Docket + RDs simultaneously in the same hit. + These cases include queries that use a text query that can match a + Docket and RD field simultaneously. + """ + + # This test confirms a text query cross-object alert matches documents + # according to trigger conditions like indexed date and previous triggers + # by the same document. + cross_object_alert_text = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Cross-object text query", + query=f'q="United states"&type=r', + ) + two_days_before = self.mock_date - datetime.timedelta(days=2) + mock_two_days_before = two_days_before.replace(hour=5) + with time_machine.travel( + mock_two_days_before, tick=False + ), self.captureOnCommitCallbacks(execute=True): + docket = DocketFactory( + court=self.court, + case_name="United States of America", + docket_number="1:21-bk-1009", + source=Docket.RECAP, + ) + + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # No alert should be triggered since the matched docket was not + # modified during the current day. + self.assertEqual( + len(mail.outbox), 0, msg="Outgoing emails don't match." + ) + + # Index new documents that match cross_object_alert_text, an RD, and + # an empty docket. + with time_machine.travel( + self.mock_date, tick=False + ), self.captureOnCommitCallbacks(execute=True): + alert_de = DocketEntryWithParentsFactory( + docket=docket, + entry_number=1, + date_filed=datetime.date(2024, 8, 19), + description="MOTION for Leave to File", + ) + rd_3 = RECAPDocumentFactory( + docket_entry=alert_de, + description="Motion to File New", + document_number="2", + pacer_doc_id="018036652875", + plain_text="United states Lorem", + ) + + docket_2 = DocketFactory( + court=self.court, + case_name="United States vs Lorem", + docket_number="1:21-bk-1008", + source=Docket.RECAP, + ) + + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # An alert should be triggered containing two hits. One matched by + # the rd_3 plain text description and one matched by docket_2 case_name + self.assertEqual( + len(mail.outbox), 1, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[0]) + self._confirm_number_of_alerts(html_content, 1) + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_text.name, + 2, + docket.case_name, + 1, + ) + # rd_3 should appear nested in this hit. + self._assert_child_hits_content( + html_content, + cross_object_alert_text.name, + docket.case_name, + [rd_3.description], + ) + # The docket_2 hit shouldn't contain RDs. + self._assert_child_hits_content( + html_content, + cross_object_alert_text.name, + docket_2.case_name, + [], + ) + # Modify 1:21-bk-1009 docket today: + with time_machine.travel( + self.mock_date, tick=False + ), self.captureOnCommitCallbacks(execute=True): + docket.cause = "405 Civil" + docket.save() + + # Trigger the alert again: + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # A new alert should be triggered containing the docket as a hit with + # no nested RDs. + html_content = self.get_html_content_from_email(mail.outbox[1]) + self.assertEqual( + len(mail.outbox), 2, msg="Outgoing emails don't match." + ) + self._confirm_number_of_alerts(html_content, 1) + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_text.name, + 1, + docket.case_name, + 0, + ) + self._assert_child_hits_content( + html_content, + cross_object_alert_text.name, + docket.case_name, + [], + ) + + # Trigger alert again: + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + # No new alerts should be triggered. + self.assertEqual( + len(mail.outbox), 2, msg="Outgoing emails don't match." + ) + + # Index new documents that match cross_object_alert_text, an RD, and + # an empty docket. + with time_machine.travel( + self.mock_date, tick=False + ), self.captureOnCommitCallbacks(execute=True): + rd_4 = RECAPDocumentFactory( + docket_entry=alert_de, + description="Hearing new", + document_number="3", + pacer_doc_id="0180366528790", + plain_text="Lorem ipsum", + ) + rd_5 = RECAPDocumentFactory( + docket_entry=alert_de, + description="Hearing new 2", + document_number="4", + pacer_doc_id="018026657750", + plain_text="United states of america plain text", + ) + + # This test confirms that we're able to trigger cross-object alerts + # that include an OR clause and a cross-object text query. + cross_object_alert_d_or_rd_field_text_query = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Cross-object query combined.", + query=f"q=docket_id:{self.de.docket.pk} OR " + f"pacer_doc_id:{self.rd.pacer_doc_id} OR " + f'("United States of America" OR ' + f"pacer_doc_id:{rd_3.pacer_doc_id})&type=r", + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # A new alert should be triggered, containing the RD document nested below + # its parent docket. + html_content = self.get_html_content_from_email(mail.outbox[2]) + self.assertEqual( + len(mail.outbox), 3, msg="Outgoing emails don't match." + ) + # The email contains two alerts: one for cross_object_alert_text + # triggered by the new rd_5 added, and one for cross_object_alert_d_or_rd_field_text_query. + self._confirm_number_of_alerts(html_content, 2) + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_text.name, + 1, + docket.case_name, + 1, + ) + # The cross_object_alert_d_or_rd_field_text_query alert contains two + # hits. The first one matches "docket" and rd_3 and rd_5 nested below + # due to the OR clause in the text query, and the second hit matches + # self.de.docket and self.rd. + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_d_or_rd_field_text_query.name, + 2, + docket.case_name, + 2, + ) + self._assert_child_hits_content( + html_content, + cross_object_alert_d_or_rd_field_text_query.name, + docket.case_name, + [rd_3.description, rd_5.description], + ) + self._assert_child_hits_content( + html_content, + cross_object_alert_d_or_rd_field_text_query.name, + self.de.docket.case_name, + [self.rd.description], + ) + + # This test confirms that hits are properly filtered when using AND in + # the text query. + cross_object_alert_d_or_rd_field_text_query_and = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Cross-object query combined.", + query=f'q=("United States of America" AND ' + f"pacer_doc_id:{rd_3.pacer_doc_id})&type=r", + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ), time_machine.travel(self.mock_date, tick=False): + call_command("cl_send_recap_alerts", testing_mode=True) + + # A new alert should be triggered, containing rd_3 document nested below + # its parent docket. + html_content = self.get_html_content_from_email(mail.outbox[3]) + self.assertEqual( + len(mail.outbox), 4, msg="Outgoing emails don't match." + ) + self._confirm_number_of_alerts(html_content, 1) + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_d_or_rd_field_text_query_and.name, + 1, + docket.case_name, + 1, + ) + self._assert_child_hits_content( + html_content, + cross_object_alert_d_or_rd_field_text_query_and.name, + docket.case_name, + [rd_3.description], + ) + + docket.delete() + docket_2.delete() + + def test_limit_alert_case_child_hits(self) -> None: + """Test limit case child hits up to 5 and display the "View additional + results for this Case" button. + """ + + with time_machine.travel(self.mock_date, tick=False): + alert_de = DocketEntryWithParentsFactory( + docket=self.de.docket, + entry_number=1, + date_filed=datetime.date(2024, 8, 19), + description="MOTION for Leave to File Amicus Curiae Lorem Served", + ) + rd_descriptions = [] + for i in range(4): + rd = RECAPDocumentFactory( + docket_entry=alert_de, + description=f"Motion to File {i+1}", + document_number=f"{i+1}", + pacer_doc_id=f"018036652436{i+1}", + ) + if i < 3: + # Omit the last alert to compare. Only up to 3 should be + # included in the case. + rd_descriptions.append(rd.description) + + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + recap_only_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert RECAP Only Docket Entry", + query=f"q=docket_entry_id:{alert_de.pk}&type=r", + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ): + call_command("cl_send_recap_alerts", testing_mode=True) + + self.assertEqual( + len(mail.outbox), 1, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[0]) + self.assertIn(recap_only_alert.name, html_content) + self._confirm_number_of_alerts(html_content, 1) + # The case alert should contain up to 3 child hits. + self._count_alert_hits_and_child_hits( + html_content, + recap_only_alert.name, + 1, + self.de.docket.case_name, + 3, + ) + self._assert_child_hits_content( + html_content, + recap_only_alert.name, + alert_de.docket.case_name, + rd_descriptions, + ) + # Assert the View more results button is present in the alert. + self.assertIn("View Additional Results for this Case", html_content) + + # Assert email text version: + txt_email = mail.outbox[0].body + self.assertIn(recap_only_alert.name, txt_email) + for description in rd_descriptions: + with self.subTest( + description=description, msg="Plain text descriptions" + ): + self.assertIn( + description, + txt_email, + msg="RECAPDocument wasn't found in the email content.", + ) + + self.assertIn("View Additional Results for this Case", txt_email) + + alert_de.delete() + + @override_settings(SCHEDULED_ALERT_HITS_LIMIT=3) + def test_multiple_alerts_email_hits_limit_per_alert(self) -> None: + """Test multiple alerts can be grouped in an email and hits within an + alert are limited to SCHEDULED_ALERT_HITS_LIMIT (3) hits. + """ + + docket = DocketFactory( + court=self.court, + case_name=f"SUBPOENAS SERVED CASE", + docket_number=f"1:21-bk-123", + source=Docket.RECAP, + cause="410 Civil", + ) + dockets_created = [] + for i in range(3): + docket_created = DocketFactory( + court=self.court, + case_name=f"SUBPOENAS SERVED CASE {i}", + docket_number=f"1:21-bk-123{i}", + source=Docket.RECAP, + cause="410 Civil", + ) + dockets_created.append(docket_created) + + alert_de = DocketEntryWithParentsFactory( + docket=docket, + entry_number=1, + date_filed=datetime.date(2024, 8, 19), + description="MOTION for Leave to File Amicus Curiae Lorem Served", + ) + rd = RECAPDocumentFactory( + docket_entry=alert_de, + description="Motion to File", + document_number="1", + pacer_doc_id="018036652439", + ) + rd_2 = RECAPDocumentFactory( + docket_entry=alert_de, + description="Motion to File 2", + document_number="2", + pacer_doc_id="018036652440", + plain_text="plain text lorem", + ) + + docket_only_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Docket Only", + query='q="410 Civil"&type=r', + ) + recap_only_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert RECAP Only Docket Entry", + query=f"q=docket_entry_id:{alert_de.pk}&type=r", + ) + cross_object_alert_with_hl = AlertFactory( + user=self.user_profile.user, + rate=Alert.REAL_TIME, + name="Test Alert Cross-object", + query=f'q="File Amicus Curiae" AND "Motion to File 2" AND ' + f'"plain text lorem" AND "410 Civil" AND ' + f"id:{rd_2.pk}&docket_number={docket.docket_number}" + f'&case_name="{docket.case_name}"&type=r', + ) + AlertFactory( + user=self.user_profile_2.user, + rate=Alert.REAL_TIME, + name="Test Alert Cross-object", + query=f'q="File Amicus Curiae" AND "Motion to File 2" AND ' + f'"plain text lorem" AND "410 Civil" AND ' + f"id:{rd_2.pk}&docket_number={docket.docket_number}" + f'&case_name="{docket.case_name}"&type=r', + ) + + call_command( + "cl_index_parent_and_child_docs", + search_type=SEARCH_TYPES.RECAP, + queue="celery", + pk_offset=0, + testing_mode=True, + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ): + call_command("cl_send_recap_alerts", testing_mode=True) + + self.assertEqual( + len(mail.outbox), 2, msg="Outgoing emails don't match." + ) + + # Assert webhooks. + webhook_events = WebhookEvent.objects.all().values_list( + "content", flat=True + ) + self.assertEqual(len(webhook_events), 3) + + # Assert docket-only alert. + html_content = self.get_html_content_from_email(mail.outbox[0]) + self.assertIn(docket_only_alert.name, html_content) + self._confirm_number_of_alerts(html_content, 3) + # The docket-only alert doesn't contain any nested child hits. + self._count_alert_hits_and_child_hits( + html_content, + docket_only_alert.name, + 3, + docket.case_name, + 0, + ) + self._count_webhook_hits_and_child_hits( + list(webhook_events), + docket_only_alert.name, + 3, + docket.case_name, + 0, + ) + + # Assert RECAP-only alert. + self.assertIn(recap_only_alert.name, html_content) + # The recap-only alert contain 2 child hits. + self._count_alert_hits_and_child_hits( + html_content, + recap_only_alert.name, + 1, + alert_de.docket.case_name, + 2, + ) + self._count_webhook_hits_and_child_hits( + list(webhook_events), + recap_only_alert.name, + 1, + alert_de.docket.case_name, + 2, + ) + self._assert_child_hits_content( + html_content, + recap_only_alert.name, + alert_de.docket.case_name, + [rd.description, rd_2.description], + ) + + # Assert Cross-object alert. + self.assertIn(recap_only_alert.name, html_content) + # The cross-object alert only contain 1 child hit. + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_with_hl.name, + 1, + alert_de.docket.case_name, + 1, + ) + self._count_webhook_hits_and_child_hits( + list(webhook_events), + cross_object_alert_with_hl.name, + 1, + alert_de.docket.case_name, + 1, + ) + self._assert_child_hits_content( + html_content, + cross_object_alert_with_hl.name, + alert_de.docket.case_name, + [rd_2.description], + ) + + # Assert HL in the cross_object_alert_with_hl + self.assertIn(f"{docket.case_name}", html_content) + self.assertEqual( + html_content.count(f"{docket.case_name}"), 1 + ) + self.assertIn(f"{docket.docket_number}", html_content) + self.assertEqual( + html_content.count(f"{docket.docket_number}"), 1 + ) + self.assertIn(f"{rd_2.plain_text}", html_content) + self.assertEqual( + html_content.count(f"{rd_2.plain_text}"), 1 + ) + self.assertIn(f"{rd_2.description}", html_content) + self.assertEqual( + html_content.count(f"{rd_2.description}"), 1 + ) + self.assertIn("File Amicus Curiae", html_content) + self.assertEqual( + html_content.count("File Amicus Curiae"), 1 + ) + + # Assert email text version: + txt_email = mail.outbox[0].body + self.assertIn(recap_only_alert.name, txt_email) + self.assertIn(docket_only_alert.name, txt_email) + self.assertIn(cross_object_alert_with_hl.name, txt_email) + for description in [rd.description, rd_2.description]: + with self.subTest( + description=description, msg="Plain text descriptions" + ): + self.assertIn( + description, + txt_email, + msg="RECAPDocument wasn't found in the email content.", + ) + + docket.delete() + for d in dockets_created: + d.delete() + + def test_schedule_wly_and_mly_recap_alerts(self) -> None: + """Test Weekly and Monthly RECAP Search Alerts are scheduled daily + before being sent later. + """ + + docket_only_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.WEEKLY, + name="Test Alert Docket Only", + query='q="401 Civil"&type=r', + ) + recap_only_alert = AlertFactory( + user=self.user_profile.user, + rate=Alert.MONTHLY, + name="Test Alert RECAP Only Docket Entry", + query=f"q=docket_entry_id:{self.de.pk}&type=r", + ) + cross_object_alert_with_hl = AlertFactory( + user=self.user_profile.user, + rate=Alert.WEEKLY, + name="Test Alert Cross-object", + query=f'q="401 Civil" id:{self.rd.pk}&type=r', + ) + with mock.patch( + "cl.api.webhooks.requests.post", + side_effect=lambda *args, **kwargs: MockResponse( + 200, mock_raw=True + ), + ): + call_command("cl_send_recap_alerts", testing_mode=True) + + # Weekly and monthly alerts are not sent right away but are scheduled as + # ScheduledAlertHit to be sent by the cl_send_scheduled_alerts command. + self.assertEqual( + len(mail.outbox), 0, msg="Outgoing emails don't match." + ) + schedule_alerts = ScheduledAlertHit.objects.all() + self.assertEqual(schedule_alerts.count(), 3) + + # Webhooks are send immediately as hits are matched. + webhook_events = WebhookEvent.objects.all().values_list( + "content", flat=True + ) + self.assertEqual(len(webhook_events), 3) + + # Send scheduled Weekly alerts and check assertions. + call_command("cl_send_scheduled_alerts", rate=Alert.WEEKLY) + self.assertEqual( + len(mail.outbox), 1, msg="Outgoing emails don't match." + ) + # Assert docket-only alert. + html_content = self.get_html_content_from_email(mail.outbox[0]) + self._count_alert_hits_and_child_hits( + html_content, + docket_only_alert.name, + 1, + self.de.docket.case_name, + 0, + ) + self._count_alert_hits_and_child_hits( + html_content, + cross_object_alert_with_hl.name, + 1, + self.de.docket.case_name, + 1, + ) + self._assert_child_hits_content( + html_content, + cross_object_alert_with_hl.name, + self.de.docket.case_name, + [self.rd.description], + ) + # Assert email text version: + txt_email = mail.outbox[0].body + self.assertIn(docket_only_alert.name, txt_email) + self.assertIn(cross_object_alert_with_hl.name, txt_email) + self.assertIn(self.rd.description, txt_email) + + # Send scheduled Monthly alerts and check assertions. + current_date = now().replace(day=28, hour=0) + with time_machine.travel(current_date, tick=False): + call_command("cl_send_scheduled_alerts", rate=Alert.MONTHLY) + self.assertEqual( + len(mail.outbox), 2, msg="Outgoing emails don't match." + ) + html_content = self.get_html_content_from_email(mail.outbox[1]) + self._count_alert_hits_and_child_hits( + html_content, + recap_only_alert.name, + 1, + self.de.docket.case_name, + 2, + ) + self._assert_child_hits_content( + html_content, + recap_only_alert.name, + self.de.docket.case_name, + [self.rd.description, self.rd_att.description], + ) + # Assert email text version: + txt_email = mail.outbox[1].body + self.assertIn(recap_only_alert.name, txt_email) + self.assertIn(self.rd.description, txt_email) + self.assertIn(self.rd_att.description, txt_email) + + def test_alert_frequency_estimation(self): + """Test alert frequency ES API endpoint for RECAP Alerts.""" + + search_params = { + "type": SEARCH_TYPES.RECAP, + "q": "Frequency Test RECAP", + } + r = self.client.get( + reverse( + "alert_frequency", kwargs={"version": "4", "day_count": "100"} + ), + search_params, + ) + self.assertEqual(r.json()["count"], 0) + with time_machine.travel( + self.mock_date, tick=False + ), self.captureOnCommitCallbacks(execute=True): + # Docket filed today. + docket = DocketFactory( + court=self.court, + case_name="Frequency Test RECAP", + docket_number="1:21-bk-1240", + source=Docket.RECAP, + date_filed=now().date(), + ) + + # RECAPDocument filed today that belongs to a docket filed outside + # the estimation range. + date_outside_range = now() - datetime.timedelta(days=102) + alert_de = DocketEntryWithParentsFactory( + docket=DocketFactory( + court=self.court, + case_name="Frequency Test RECAP", + docket_number="1:21-bk-1245", + source=Docket.RECAP, + date_filed=date_outside_range.date(), + ), + entry_number=1, + date_filed=now().date(), + ) + RECAPDocumentFactory( + docket_entry=alert_de, + description="Frequency Test RECAP", + document_number="1", + pacer_doc_id="018036652450", + ) + + r = self.client.get( + reverse( + "alert_frequency", kwargs={"version": "4", "day_count": "100"} + ), + search_params, + ) + # 2 expected hits in the last 100 days. One docket filed today + one + # RECAPDocument filed today. + self.assertEqual(r.json()["count"], 2) + + docket.delete() + alert_de.docket.delete() diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py index d287b3627a..4b97fc7ba8 100644 --- a/cl/alerts/utils.py +++ b/cl/alerts/utils.py @@ -4,7 +4,8 @@ from django.conf import settings from django.http import QueryDict from elasticsearch_dsl import Q, Search -from elasticsearch_dsl.response import Response +from elasticsearch_dsl.response import Hit, Response +from redis import Redis from cl.alerts.models import ( SCHEDULED_ALERT_HIT_STATUS, @@ -16,7 +17,6 @@ from cl.lib.elasticsearch_utils import add_es_highlighting from cl.search.documents import AudioPercolator from cl.search.models import SEARCH_TYPES, Docket -from cl.users.models import UserProfile @dataclass @@ -25,6 +25,14 @@ class DocketAlertReportObject: docket: Docket +@dataclass +class TaskCompletionStatus: + completed: bool = False + created: int = 0 + total: int = 0 + start_time_millis: int | None = None + + class OldAlertReport: def __init__(self): self.old_alerts = [] @@ -138,3 +146,44 @@ def alert_hits_limit_reached(alert_pk: int, user_pk: int) -> bool: ) return True return False + + +def make_alert_set_key(alert_id: int, document_type: str) -> str: + """Generate a Redis key for storing alert hits. + + :param alert_id: The ID of the alert. + :param document_type: The type of document associated with the alert. + :return: A Redis key string in the format "alert_hits:{alert_id}.{document_type}". + """ + return f"alert_hits:{alert_id}.{document_type}" + + +def add_document_hit_to_alert_set( + r: Redis, alert_id: int, document_type: str, document_id: int +) -> None: + """Add a document ID to the Redis SET associated with an alert ID. + + :param r: Redis client instance. + :param alert_id: The alert identifier. + :param document_type: The type of document associated with the alert. + :param document_id: The docket identifier to add. + :return: None + """ + alert_key = make_alert_set_key(alert_id, document_type) + r.sadd(alert_key, document_id) + + +def has_document_alert_hit_been_triggered( + r: Redis, alert_id: int, document_type: str, document_id: int +) -> bool: + """Check if a document ID is a member of the Redis SET associated with an + alert ID. + + :param r: Redis client instance. + :param alert_id: The alert identifier. + :param document_type: The type of document associated with the alert. + :param document_id: The docket identifier to check. + :return: True if the docket ID is a member of the set, False otherwise. + """ + alert_key = make_alert_set_key(alert_id, document_type) + return r.sismember(alert_key, document_id) diff --git a/cl/api/tasks.py b/cl/api/tasks.py index b70420fe95..7f0b8d2cdd 100644 --- a/cl/api/tasks.py +++ b/cl/api/tasks.py @@ -1,6 +1,8 @@ import json +from collections import defaultdict from typing import Any +from elasticsearch_dsl.response import Hit from rest_framework.renderers import JSONRenderer from cl.alerts.api_serializers import SearchAlertSerializerModel @@ -10,9 +12,12 @@ from cl.api.webhooks import send_webhook_event from cl.celery_init import app from cl.corpus_importer.api_serializers import DocketEntrySerializer -from cl.search.api_serializers import V3OAESResultSerializer +from cl.search.api_serializers import ( + RECAPESResultSerializer, + V3OAESResultSerializer, +) from cl.search.api_utils import ResultObject -from cl.search.models import DocketEntry +from cl.search.models import SEARCH_TYPES, DocketEntry @app.task() @@ -77,6 +82,7 @@ def send_docket_alert_webhook_events( send_webhook_event(webhook_event, json_bytes) +# TODO: Remove after scheduled OA alerts have been processed. @app.task() def send_es_search_alert_webhook( results: list[dict[str, Any]], @@ -116,3 +122,66 @@ def send_es_search_alert_webhook( content=post_content, ) send_webhook_event(webhook_event, json_bytes) + + +@app.task() +def send_search_alert_webhook_es( + results: list[dict[str, Any]] | list[Hit], + webhook_pk: int, + alert_pk: int, +) -> None: + """Send a search alert webhook event containing search results from a + search alert object. + + :param results: The search results returned by SOLR for this alert. + :param webhook_pk: The webhook endpoint ID object to send the event to. + :param alert_pk: The search alert ID. + """ + + webhook = Webhook.objects.get(pk=webhook_pk) + alert = Alert.objects.get(pk=alert_pk) + serialized_alert = SearchAlertSerializerModel(alert).data + match alert.alert_type: + case SEARCH_TYPES.ORAL_ARGUMENT: + es_results = [] + for result in results: + result["snippet"] = result["text"] + es_results.append(ResultObject(initial=result)) + serialized_results = V3OAESResultSerializer( + es_results, many=True + ).data + case SEARCH_TYPES.RECAP: + for result in results: + child_result_objects = [] + if hasattr(result, "child_docs"): + for child_doc in result.child_docs: + child_result_objects.append( + defaultdict( + lambda: None, child_doc["_source"].to_dict() + ) + ) + result["child_docs"] = child_result_objects + serialized_results = RECAPESResultSerializer( + results, many=True + ).data + case _: + # No implemented alert type. + return None + + post_content = { + "webhook": generate_webhook_key_content(webhook), + "payload": { + "results": serialized_results, + "alert": serialized_alert, + }, + } + renderer = JSONRenderer() + json_bytes = renderer.render( + post_content, + accepted_media_type="application/json;", + ) + webhook_event = WebhookEvent.objects.create( + webhook=webhook, + content=post_content, + ) + send_webhook_event(webhook_event, json_bytes) diff --git a/cl/api/urls.py b/cl/api/urls.py index e407d4b94a..c8d1c6a7ea 100644 --- a/cl/api/urls.py +++ b/cl/api/urls.py @@ -322,7 +322,7 @@ name="coverage_data_opinions", ), re_path( - r"^api/rest/v(?P[123])/alert-frequency/(?P\d+)/$", + r"^api/rest/v(?P[1234])/alert-frequency/(?P\d+)/$", views.get_result_count, name="alert_frequency", ), diff --git a/cl/api/views.py b/cl/api/views.py index e0b6987654..29aa0c40b6 100644 --- a/cl/api/views.py +++ b/cl/api/views.py @@ -21,7 +21,11 @@ build_coverage_query, get_solr_interface, ) -from cl.search.documents import AudioDocument, OpinionClusterDocument +from cl.search.documents import ( + AudioDocument, + DocketDocument, + OpinionClusterDocument, +) from cl.search.forms import SearchForm from cl.search.models import SEARCH_TYPES, Citation, Court, OpinionCluster from cl.simple_pages.coverage_utils import build_chart_data @@ -278,7 +282,10 @@ async def get_result_count(request, version, day_count): es_flag_for_o = await sync_to_async(waffle.flag_is_active)( request, "o-es-active" ) - is_es_form = es_flag_for_oa or es_flag_for_o + es_flag_for_r = await sync_to_async(waffle.flag_is_active)( + request, "recap-alerts-active" + ) + is_es_form = es_flag_for_oa or es_flag_for_o or es_flag_for_r search_form = await sync_to_async(SearchForm)( request.GET.copy(), is_es_form=is_es_form ) @@ -303,6 +310,12 @@ async def get_result_count(request, version, day_count): total_query_results = await sync_to_async( do_es_alert_estimation_query )(search_query, cd, day_count) + case SEARCH_TYPES.RECAP if es_flag_for_r: + # Elasticsearch version for RECAP + search_query = DocketDocument.search() + total_query_results = await sync_to_async( + do_es_alert_estimation_query + )(search_query, cd, day_count) case _: @sync_to_async diff --git a/cl/corpus_importer/management/commands/ready_mix_cases_project.py b/cl/corpus_importer/management/commands/ready_mix_cases_project.py index eabb93f4a4..32c9db7ae3 100644 --- a/cl/corpus_importer/management/commands/ready_mix_cases_project.py +++ b/cl/corpus_importer/management/commands/ready_mix_cases_project.py @@ -198,7 +198,8 @@ def query_results_in_es(options): } search_query = DocketDocument.search() - s, _ = build_es_base_query(search_query, cd) + es_queries = build_es_base_query(search_query, cd) + s = es_queries.search_query s = s.extra(size=options["results_size"]) response = s.execute().to_dict() extracted_data = [ diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py index a63191495a..39d535b2df 100644 --- a/cl/custom_filters/templatetags/extras.py +++ b/cl/custom_filters/templatetags/extras.py @@ -1,10 +1,13 @@ import random import re +import urllib.parse from datetime import datetime +import waffle from django import template from django.core.exceptions import ValidationError from django.template import Context +from django.template.context import RequestContext from django.template.defaultfilters import date as date_filter from django.utils.formats import date_format from django.utils.html import format_html @@ -12,7 +15,8 @@ from django.utils.safestring import SafeString, mark_safe from elasticsearch_dsl import AttrDict, AttrList -from cl.search.models import Court, Docket, DocketEntry +from cl.search.constants import ALERTS_HL_TAG, SEARCH_HL_TAG +from cl.search.models import SEARCH_TYPES, Court, Docket, DocketEntry register = template.Library() @@ -130,11 +134,25 @@ def random_int(a: int, b: int) -> int: @register.filter -def get_attrdict(mapping, key): - """Emulates the dictionary get for AttrDict objects. Useful when keys - have spaces or other punctuation.""" +def get_es_doc_content( + mapping: AttrDict | dict, scheduled_alert: bool = False +) -> AttrDict | dict | str: + """ + Returns the ES document content placed in the "_source" field if the + document is an AttrDict, or just returns the content if it's not necessary + to extract from "_source" such as in scheduled alerts where the content is + a dict. + + :param mapping: The AttrDict or dict instance to extract the content from. + :param scheduled_alert: A boolean indicating if the content belongs to a + scheduled alert where the content is already in place. + :return: The ES document content. + """ + + if scheduled_alert: + return mapping try: - return mapping[key] + return mapping["_source"] except KeyError: return "" @@ -200,13 +218,15 @@ def citation(obj) -> SafeString: @register.simple_tag -def contains_highlights(content: str) -> bool: +def contains_highlights(content: str, alert: bool = False) -> bool: """Check if a given string contains the mark tag used in highlights. :param content: The input string to check. + :param alert: Whether this tag is being used in the alert template. :return: True if the mark highlight tag is found, otherwise False. """ - pattern = r".*?" + hl_tag = ALERTS_HL_TAG if alert else SEARCH_HL_TAG + pattern = rf"<{hl_tag}>.*?" matches = re.findall(pattern, content) return bool(matches) @@ -247,6 +267,35 @@ def get_highlight(result: AttrDict | dict[str, any], field: str) -> any: return render_string_or_list(hl_value) if hl_value else original_value +@register.simple_tag +def extract_q_value(query: str) -> str: + """Extract the value of the "q" parameter from a URL-encoded query string. + + :param query: The URL-encoded query string. + :return: The value of the "q" parameter or an empty string if "q" is not found. + """ + + parsed_query = urllib.parse.parse_qs(query) + return parsed_query.get("q", [""])[0] + + +@register.simple_tag(takes_context=True) +def alerts_supported(context: RequestContext, search_type: str) -> str: + """Determine if search alerts are supported based on the search type and flag + status. + + :param context: The template context, which includes the request, required + for the waffle flag. + :param search_type: The type of search being performed. + :return: True if alerts are supported, False otherwise. + """ + + request = context["request"] + if search_type == SEARCH_TYPES.RECAP: + return waffle.flag_is_active(request, "recap-alerts-active") + return search_type in (SEARCH_TYPES.OPINION, SEARCH_TYPES.ORAL_ARGUMENT) + + @register.filter def group_courts(courts: list[Court], num_columns: int) -> list: """Divide courts in equal groupings while keeping related courts together diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py index 835bd52c36..c12b9c0058 100644 --- a/cl/lib/elasticsearch_utils.py +++ b/cl/lib/elasticsearch_utils.py @@ -35,6 +35,7 @@ ApiPositionMapping, BasePositionMapping, CleanData, + EsMainQueries, ESRangeQueryParams, ) from cl.lib.utils import ( @@ -132,8 +133,8 @@ def build_numeric_range_query( def build_daterange_query( field: str, - before: datetime.date, - after: datetime.date, + before: datetime.date | str, + after: datetime.date | str, relation: Literal["INTERSECTS", "CONTAINS", "WITHIN", None] = None, ) -> list[Range]: """Given field name and date range limits returns ElasticSearch range query or None @@ -148,9 +149,9 @@ def build_daterange_query( params = {} if any([before, after]): - if hasattr(after, "strftime"): + if isinstance(after, datetime.date): params["gte"] = f"{after.isoformat()}T00:00:00Z" - if hasattr(before, "strftime"): + if isinstance(before, datetime.date): params["lte"] = f"{before.isoformat()}T23:59:59Z" if relation is not None: allowed_relations = ["INTERSECTS", "CONTAINS", "WITHIN"] @@ -931,6 +932,7 @@ def build_has_child_query( order_by: tuple[str, str] | None = None, child_highlighting: bool = True, default_current_date: datetime.date | None = None, + alerts: bool = False, ) -> QueryString: """Build a 'has_child' query. @@ -943,6 +945,7 @@ def build_has_child_query( :param child_highlighting: Whether highlighting should be enabled in child docs. :param default_current_date: The default current date to use for computing a stable date score across pagination in the V4 Search API. + :param alerts: If highlighting is being applied to search Alerts hits. :return: The 'has_child' query. """ @@ -959,8 +962,9 @@ def build_has_child_query( default_current_date=default_current_date, ) + hl_tag = ALERTS_HL_TAG if alerts else SEARCH_HL_TAG highlight_options, fields_to_exclude = build_highlights_dict( - highlighting_fields, SEARCH_HL_TAG, child_highlighting + highlighting_fields, hl_tag, child_highlighting ) inner_hits = { @@ -1110,22 +1114,27 @@ def build_es_base_query( cd: CleanData, child_highlighting: bool = True, api_version: Literal["v3", "v4"] | None = None, -) -> tuple[Search, QueryString | None]: + alerts: bool = False, +) -> EsMainQueries: """Builds filters and fulltext_query based on the given cleaned data and returns an elasticsearch query. :param search_query: The Elasticsearch search query object. :param cd: The cleaned data object containing the query and filters. - :param child_highlighting: Whether highlighting should be enabled in child docs. + :param child_highlighting: Whether highlighting should be enabled in child + docs. :param api_version: Optional, the request API version. - :return: A two-tuple, the Elasticsearch search query object and an ES - QueryString for child documents, or None if there is no need to query - child documents. + :param alerts: If highlighting is being applied to search Alerts hits. + :return: An `EsMainQueries` object containing the Elasticsearch search + query object and an ES QueryString for child documents or None if there is + no need to query child documents and a QueryString for parent documents or + None. """ main_query = None string_query = None - join_query = None + child_docs_query = None + parent_query = None filters = [] plain_doc = False match cd["type"]: @@ -1170,12 +1179,14 @@ def build_es_base_query( ], ) ) - main_query, join_query = build_full_join_es_queries( - cd, - child_query_fields, - parent_query_fields, - child_highlighting=child_highlighting, - api_version=api_version, + main_query, child_docs_query, parent_query = ( + build_full_join_es_queries( + cd, + child_query_fields, + parent_query_fields, + child_highlighting=child_highlighting, + api_version=api_version, + ) ) case ( @@ -1206,13 +1217,17 @@ def build_es_base_query( ], ) ) - main_query, join_query = build_full_join_es_queries( - cd, - child_query_fields, - parent_query_fields, - child_highlighting=child_highlighting, - api_version=api_version, + main_query, child_docs_query, parent_query = ( + build_full_join_es_queries( + cd, + child_query_fields, + parent_query_fields, + child_highlighting=child_highlighting, + api_version=api_version, + alerts=alerts, + ) ) + case SEARCH_TYPES.OPINION: str_query = cd.get("q", "") related_match = RELATED_PATTERN.search(str_query) @@ -1222,15 +1237,21 @@ def build_es_base_query( mlt_query = async_to_sync(build_more_like_this_query)( cluster_pks ) - main_query, join_query = build_full_join_es_queries( - cd, - {"opinion": []}, - [], - mlt_query, - child_highlighting=False, - api_version=api_version, + main_query, child_docs_query, parent_query = ( + build_full_join_es_queries( + cd, + {"opinion": []}, + [], + mlt_query, + child_highlighting=False, + api_version=api_version, + ) + ) + return EsMainQueries( + search_query=search_query.query(main_query), + parent_query=parent_query, + child_query=child_docs_query, ) - return search_query.query(main_query), join_query opinion_search_fields = SEARCH_OPINION_QUERY_FIELDS child_fields = opinion_search_fields.copy() @@ -1256,13 +1277,15 @@ def build_es_base_query( ], ) ) - main_query, join_query = build_full_join_es_queries( - cd, - child_query_fields, - parent_query_fields, - mlt_query, - child_highlighting=child_highlighting, - api_version=api_version, + main_query, child_docs_query, parent_query = ( + build_full_join_es_queries( + cd, + child_query_fields, + parent_query_fields, + mlt_query, + child_highlighting=child_highlighting, + api_version=api_version, + ) ) if not any([filters, string_query, main_query]): @@ -1271,7 +1294,11 @@ def build_es_base_query( match_all_query = get_match_all_query( cd, search_query, api_version, child_highlighting ) - return match_all_query, join_query + return EsMainQueries( + search_query=match_all_query, + parent_query=parent_query, + child_query=child_docs_query, + ) if plain_doc: # Combine the filters and string query for plain documents like Oral @@ -1280,7 +1307,11 @@ def build_es_base_query( cd, filters, string_query, api_version ) - return search_query.query(main_query), join_query + return EsMainQueries( + search_query=search_query.query(main_query), + parent_query=parent_query, + child_query=child_docs_query, + ) def build_has_parent_parties_query( @@ -1310,7 +1341,7 @@ def build_has_parent_parties_query( def build_child_docs_query( - join_query: QueryString | None, + child_docs_query: QueryString | None, cd: CleanData, exclude_docs_for_empty_field: str = "", ) -> QueryString: @@ -1320,7 +1351,7 @@ def build_child_docs_query( to retrieve child documents directly, such as in the Opinions Feed, RECAP Feed, RECAP Documents count query, and V4 RECAP_DOCUMENT Search API. - :param join_query: Existing Elasticsearch QueryString object or None + :param child_docs_query: Existing Elasticsearch QueryString object or None :param cd: The user input CleanedData :param exclude_docs_for_empty_field: Field that should not be empty for a document to be included @@ -1338,7 +1369,7 @@ def build_child_docs_query( ] parties_has_parent_query = build_has_parent_parties_query(parties_filters) - if not join_query: + if not child_docs_query: # Match all query case. if not exclude_docs_for_empty_field: if cd["type"] == SEARCH_TYPES.OPINION: @@ -1360,7 +1391,7 @@ def build_child_docs_query( filters.append(child_query_recap) return Q("bool", filter=filters) - query_dict = join_query.to_dict() + query_dict = child_docs_query.to_dict() if "filter" in query_dict["bool"]: existing_filter = query_dict["bool"]["filter"] if cd["type"] == SEARCH_TYPES.OPINION: @@ -1422,7 +1453,8 @@ def get_facet_dict_for_search_query( """ cd["just_facets_query"] = True - search_query, _ = build_es_base_query(search_query, cd) + es_queries = build_es_base_query(search_query, cd) + search_query = es_queries.search_query search_query.aggs.bucket("status", A("terms", field="status.raw")) search_query = search_query.extra(size=0) response = search_query.execute() @@ -1444,7 +1476,9 @@ def build_es_main_query( applicable. """ search_query_base = search_query - search_query, join_query = build_es_base_query(search_query, cd) + es_queries = build_es_base_query(search_query, cd) + search_query = es_queries.search_query + child_docs_query = es_queries.child_query top_hits_limit = 5 child_docs_count_query = None match cd["type"]: @@ -1462,7 +1496,9 @@ def build_es_main_query( top_hits_limit, ) case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: - child_docs_count_query = build_child_docs_query(join_query, cd) + child_docs_count_query = build_child_docs_query( + child_docs_query, cd + ) if child_docs_count_query: # Get the total RECAP Documents count. child_docs_count_query = search_query_base.query( @@ -2145,7 +2181,7 @@ def fetch_es_results( return [], 0, error, None, None -def build_has_child_filters(cd: CleanData) -> list[QueryString]: +def build_has_child_filters(cd: CleanData) -> list[QueryString | Range]: """Builds Elasticsearch 'has_child' filters based on the given child type and CleanData. @@ -2181,6 +2217,8 @@ def build_has_child_filters(cd: CleanData) -> list[QueryString]: description = cd.get("description", "") document_number = cd.get("document_number", "") attachment_number = cd.get("attachment_number", "") + entry_date_filed_after = cd.get("entry_date_filed_after", "") + entry_date_filed_before = cd.get("entry_date_filed_before", "") if available_only: queries_list.extend( @@ -2199,6 +2237,14 @@ def build_has_child_filters(cd: CleanData) -> list[QueryString]: queries_list.extend( build_term_query("attachment_number", attachment_number) ) + if entry_date_filed_after or entry_date_filed_before: + queries_list.extend( + build_daterange_query( + "entry_date_filed", + entry_date_filed_before, + entry_date_filed_after, + ) + ) return queries_list @@ -2358,13 +2404,15 @@ def build_search_feed_query( hl_field = "text" if cd["type"] == SEARCH_TYPES.RECAP: hl_field = "plain_text" - s, join_query = build_es_base_query(search_query, cd) + es_queries = build_es_base_query(search_query, cd) + s = es_queries.search_query + child_docs_query = es_queries.child_query if jurisdiction or cd["type"] == SEARCH_TYPES.RECAP: # An Opinion Jurisdiction feed or RECAP Search displays child documents # Eliminate items that lack the ordering field and apply highlighting # to create a snippet for the plain_text or text fields. s = build_child_docs_query( - join_query, + child_docs_query, cd=cd, exclude_docs_for_empty_field=exclude_docs_for_empty_field, ) @@ -2479,7 +2527,8 @@ def build_full_join_es_queries( mlt_query: Query | None = None, child_highlighting: bool = True, api_version: Literal["v3", "v4"] | None = None, -) -> tuple[QueryString | list, QueryString | None]: + alerts: bool = False, +) -> tuple[QueryString | list, QueryString | None, QueryString | None]: """Build a complete Elasticsearch query with both parent and child document conditions. @@ -2489,7 +2538,9 @@ def build_full_join_es_queries( :param mlt_query: the More Like This Query object. :param child_highlighting: Whether highlighting should be enabled in child docs. :param api_version: Optional, the request API version. - :return: An Elasticsearch QueryString object. + :param alerts: If highlighting is being applied to search Alerts hits. + :return: A three-tuple: the main join query, the child documents query, and + the parent documents query. """ q_should = [] @@ -2505,7 +2556,8 @@ def build_full_join_es_queries( case SEARCH_TYPES.PEOPLE: child_type = "position" - join_query = None + child_docs_query = None + parent_query = None if cd["type"] in [ SEARCH_TYPES.RECAP, SEARCH_TYPES.DOCKETS, @@ -2548,7 +2600,7 @@ def build_full_join_es_queries( query for query in parent_filters if not isinstance(query, QueryString) - or query.fields[0] not in ["party", "attorney"] + or query.fields[0] not in ["party", "attorney", "firm"] ] ) if parties_filters: @@ -2562,18 +2614,18 @@ def build_full_join_es_queries( case [], []: pass case [], _: - join_query = Q( + child_docs_query = Q( "bool", should=child_text_query, minimum_should_match=1, ) case _, []: - join_query = Q( + child_docs_query = Q( "bool", filter=child_filters, ) case _, _: - join_query = Q( + child_docs_query = Q( "bool", filter=child_filters, should=child_text_query, @@ -2589,13 +2641,14 @@ def build_full_join_es_queries( (child_highlighting, cd["type"]), {} ) has_child_query = build_has_child_query( - join_query, + child_docs_query, child_type, query_hits_limit, hl_fields, get_function_score_sorting_key(cd, api_version), child_highlighting=child_highlighting, default_current_date=cd.get("request_date"), + alerts=alerts, ) if parties_filters and not has_child_query: @@ -2610,6 +2663,7 @@ def build_full_join_es_queries( SEARCH_RECAP_CHILD_HL_FIELDS, get_function_score_sorting_key(cd, api_version), default_current_date=cd.get("request_date"), + alerts=alerts, ) if has_child_query: @@ -2666,9 +2720,9 @@ def build_full_join_es_queries( q_should.append(parent_query) if not q_should: - return [], join_query + return [], child_docs_query, parent_query - final_query = apply_custom_score_to_main_query( + main_join_query = apply_custom_score_to_main_query( cd, Q( "bool", @@ -2676,10 +2730,7 @@ def build_full_join_es_queries( ), api_version, ) - return ( - final_query, - join_query, - ) + return (main_join_query, child_docs_query, parent_query) def limit_inner_hits( @@ -2916,12 +2967,12 @@ def do_es_api_query( child documents. """ - child_docs_query = None - try: - s, join_query = build_es_base_query( + es_queries = build_es_base_query( search_query, cd, cd["highlight"], api_version ) + s = es_queries.search_query + child_docs_query = es_queries.child_query except ( UnbalancedParenthesesQuery, UnbalancedQuotesQuery, @@ -2938,7 +2989,7 @@ def do_es_api_query( # Note that in V3 Case Law Search, opinions are collapsed by cluster_id # meaning that only one result per cluster is shown. s = build_child_docs_query( - join_query, + child_docs_query, cd=cd, ) main_query = search_query.query(s) @@ -2974,7 +3025,7 @@ def do_es_api_query( ) else: child_docs_query = build_child_docs_query( - join_query, + child_docs_query, cd=cd, ) # Build query params for the ES V4 Search API endpoints. @@ -3076,7 +3127,7 @@ def do_es_alert_estimation_query( """ match cd["type"]: - case SEARCH_TYPES.OPINION: + case SEARCH_TYPES.OPINION | SEARCH_TYPES.RECAP: after_field = "filed_after" before_field = "filed_before" case SEARCH_TYPES.ORAL_ARGUMENT: @@ -3089,11 +3140,123 @@ def do_es_alert_estimation_query( days=int(day_count) ) cd[before_field] = None - estimation_query, _ = build_es_base_query(search_query, cd) + es_queries = build_es_base_query(search_query, cd) + estimation_query = es_queries.search_query + if cd["type"] == SEARCH_TYPES.RECAP: + # The RECAP estimation query consists of two requests: one to estimate + # Docket hits and one to estimate RECAPDocument hits. + del cd[after_field] + del cd[before_field] + cd["entry_date_filed_after"] = ( + datetime.date.today() - datetime.timedelta(days=int(day_count)) + ) + cd["entry_date_filed_before"] = None + + main_doc_count_query = clean_count_query(estimation_query) + main_doc_count_query = main_doc_count_query.extra( + size=0, track_total_hits=True + ) + + # Perform the two queries in a single request. + multi_search = MultiSearch() + multi_search = multi_search.add(main_doc_count_query) + + # Build RECAPDocuments count query. + es_queries = build_es_base_query(search_query, cd) + child_docs_query = es_queries.child_query + child_docs_count_query = build_child_docs_query(child_docs_query, cd) + child_total = 0 + if child_docs_count_query: + child_docs_count_query = search_query.query(child_docs_count_query) + child_total_query = child_docs_count_query.extra( + size=0, track_total_hits=True + ) + multi_search = multi_search.add(child_total_query) + + responses = multi_search.execute() + parent_total = responses[0].hits.total.value + if child_docs_count_query: + child_doc_count_response = responses[1] + child_total = child_doc_count_response.hits.total.value + total_recap_estimation = parent_total + child_total + return total_recap_estimation return estimation_query.count() +def do_es_sweep_alert_query( + search_query: Search, + child_search_query: Search, + cd: CleanData, +) -> tuple[list[Hit] | None, Response | None, Response | None]: + """Build an ES query for its use in the daily RECAP sweep index. + + :param search_query: Elasticsearch DSL Search object. + :param child_search_query: The Elasticsearch DSL search query to perform + the child-only query. + :param cd: The query CleanedData + :return: A two-tuple, the Elasticsearch search query object and an ES + Query for child documents, or None if there is no need to query + child documents. + """ + + search_form = SearchForm(cd, is_es_form=True) + if search_form.is_valid(): + cd = search_form.cleaned_data + else: + return None, None, None + es_queries = build_es_base_query(search_query, cd, True, alerts=True) + s = es_queries.search_query + parent_query = es_queries.parent_query + child_query = es_queries.child_query + main_query = add_es_highlighting(s, cd, alerts=True) + main_query = main_query.sort(build_sort_results(cd)) + main_query = main_query.extra( + from_=0, size=settings.SCHEDULED_ALERT_HITS_LIMIT + ) + + multi_search = MultiSearch() + multi_search = multi_search.add(main_query) + if parent_query: + parent_search = search_query.query(parent_query) + parent_search = parent_search.extra( + from_=0, size=settings.SCHEDULED_ALERT_HITS_LIMIT + ) + parent_search = parent_search.source(includes=["docket_id"]) + multi_search = multi_search.add(parent_search) + + if child_query: + child_search = child_search_query.query(child_query) + child_search = child_search.extra( + from_=0, + size=settings.SCHEDULED_ALERT_HITS_LIMIT + * settings.RECAP_CHILD_HITS_PER_RESULT, + ) + child_search = child_search.source(includes=["id"]) + multi_search = multi_search.add(child_search) + + responses = multi_search.execute() + main_results = responses[0] + rd_results = None + docket_results = None + if parent_query: + docket_results = responses[1] + if child_query: + rd_results = responses[2] + + limit_inner_hits({}, main_results, cd["type"]) + set_results_highlights(main_results, cd["type"]) + + for result in main_results: + child_result_objects = [] + if hasattr(result, "child_docs"): + for child_doc in result.child_docs: + child_result_objects.append(child_doc.to_dict()) + result["child_docs"] = child_result_objects + + return main_results, docket_results, rd_results + + def compute_lowest_possible_estimate(precision_threshold: int) -> int: """Estimates can be below reality by as much as 6%. Round numbers below that threshold. :return: The lowest possible estimate. diff --git a/cl/lib/types.py b/cl/lib/types.py index 82d6131b5e..ff257574e9 100644 --- a/cl/lib/types.py +++ b/cl/lib/types.py @@ -4,6 +4,8 @@ from typing import Any, Callable, Dict, List, NotRequired, TypedDict, Union from django.http import HttpRequest +from django_elasticsearch_dsl.search import Search +from elasticsearch_dsl.query import QueryString from cl.users.models import User @@ -190,6 +192,13 @@ def get_db_to_dataclass_map(self): return self.__db_to_dataclass_map +@dataclass +class EsMainQueries: + search_query: Search + parent_query: QueryString | None = None + child_query: QueryString | None = None + + @dataclass class ApiPositionMapping(BasePositionMapping): position_type_dict: defaultdict[int, list[str]] = field( diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py index 6eccba198e..80244e7865 100644 --- a/cl/recap/mergers.py +++ b/cl/recap/mergers.py @@ -1792,6 +1792,15 @@ async def merge_attachment_page_data( if not all(sanity_checks): continue + # Missing on some restricted docs (see Juriscraper) + # Attachment 0 may not have page count since it is the main rd. + if ( + "page_count" in attachment + and attachment["page_count"] is None + and attachment["attachment_number"] != 0 + ): + continue + # Appellate entries with attachments don't have a main RD, transform it # to an attachment. In ACMS attachment pages, all the documents use the # same pacer_doc_id, so we need to make sure only one is matched to the diff --git a/cl/search/constants.py b/cl/search/constants.py index a62617f031..333dfbca6c 100644 --- a/cl/search/constants.py +++ b/cl/search/constants.py @@ -160,7 +160,6 @@ "referredTo": 0, "suitNature": 0, } - SEARCH_OPINION_HL_FIELDS = { "caseName": 0, "citation": 0, @@ -307,6 +306,7 @@ Opinion.TRIAL_COURT: "trial-court-document", } + cardinality_query_unique_ids = { SEARCH_TYPES.RECAP: "docket_id", SEARCH_TYPES.DOCKETS: "docket_id", diff --git a/cl/search/documents.py b/cl/search/documents.py index e059b58f22..59d8327875 100644 --- a/cl/search/documents.py +++ b/cl/search/documents.py @@ -1,8 +1,10 @@ from datetime import datetime +from django.conf import settings from django.http import QueryDict from django.utils.html import escape, strip_tags from django_elasticsearch_dsl import Document, fields +from elasticsearch_dsl import Document as DSLDocument from cl.alerts.models import Alert from cl.audio.models import Audio @@ -363,8 +365,8 @@ def prepare_percolator_query(self, instance): cd = search_form.cleaned_data search_query = AudioDocument.search() - query, _ = build_es_base_query(search_query, cd) - return query.to_dict()["query"] + es_queries = build_es_base_query(search_query, cd) + return es_queries.search_query.to_dict()["query"] class ES_CHILD_ID: @@ -960,8 +962,7 @@ def prepare_timestamp(self, instance): return datetime.utcnow() -@recap_index.document -class ESRECAPDocument(DocketBaseDocument): +class ESRECAPBaseDocument(DSLDocument): id = fields.IntegerField(attr="pk") docket_entry_id = fields.IntegerField(attr="docket_entry.pk") description = fields.TextField( @@ -1029,6 +1030,10 @@ class ESRECAPDocument(DocketBaseDocument): fields.IntegerField(multi=True), ) + +@recap_index.document +class ESRECAPDocument(DocketBaseDocument, ESRECAPBaseDocument): + class Django: model = RECAPDocument ignore_signals = True @@ -1834,3 +1839,24 @@ def prepare_non_participating_judge_ids(self, instance): def prepare_cluster_child(self, instance): return "opinion_cluster" + + +class RECAPSweepDocument(DocketDocument, ESRECAPDocument): + class Index: + name = "recap_sweep" + settings = { + "number_of_shards": settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS, + "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS, + "analysis": settings.ELASTICSEARCH_DSL["analysis"], + } + + +class ESRECAPSweepDocument(ESRECAPBaseDocument): + + class Index: + name = "recap_document_sweep" + settings = { + "number_of_shards": settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS, + "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS, + "analysis": settings.ELASTICSEARCH_DSL["analysis"], + } diff --git a/cl/search/forms.py b/cl/search/forms.py index 7ff40a7f3e..7dcd91eaeb 100644 --- a/cl/search/forms.py +++ b/cl/search/forms.py @@ -221,6 +221,16 @@ class SearchForm(forms.Form): ), ) atty_name.as_str_types = [SEARCH_TYPES.RECAP] + firm_name = forms.CharField( + required=False, + label="Firm Name", + widget=forms.TextInput( + attrs={ + "class": "external-input form-control", + "autocomplete": "off", + }, + ), + ) # # Oral argument fields diff --git a/cl/search/management/commands/cl_index_parent_and_child_docs.py b/cl/search/management/commands/cl_index_parent_and_child_docs.py index abb528a3b0..366d9fe13e 100644 --- a/cl/search/management/commands/cl_index_parent_and_child_docs.py +++ b/cl/search/management/commands/cl_index_parent_and_child_docs.py @@ -526,7 +526,9 @@ def process_queryset( match task_to_use: case "index_parent_and_child_docs": index_parent_and_child_docs.si( - chunk, search_type, testing_mode=testing_mode + chunk, + search_type, + testing_mode=testing_mode, ).set(queue=queue).apply_async() case "index_parent_or_child_docs": diff --git a/cl/search/tasks.py b/cl/search/tasks.py index c5daeb5073..fa6bf4fdaf 100644 --- a/cl/search/tasks.py +++ b/cl/search/tasks.py @@ -477,6 +477,13 @@ def document_fields_to_update( continue field_value = prepare_method(main_instance) fields_to_update[field] = field_value + + if fields_to_update: + # If fields to update, append the timestamp to be updated too. + prepare_timestamp = getattr(es_document(), f"prepare_timestamp", None) + if prepare_timestamp: + field_value = prepare_timestamp(main_instance) + fields_to_update["timestamp"] = field_value return fields_to_update @@ -777,9 +784,18 @@ def update_children_docs_by_query( # Build the UpdateByQuery script and execute it script_lines = [] params = {} + if fields_to_update: + # If there are fields to update include the timestamp field too. + fields_to_update.append("timestamp") for field_to_update in fields_to_update: field_list = ( - fields_map[field_to_update] if fields_map else [field_to_update] + ["timestamp"] + if field_to_update == "timestamp" + else ( + fields_map[field_to_update] + if fields_map + else [field_to_update] + ) ) for field_name in field_list: script_lines.append( diff --git a/cl/search/templates/feeds/solr_desc_template.html b/cl/search/templates/feeds/solr_desc_template.html index ce02928003..479d9fe4f5 100644 --- a/cl/search/templates/feeds/solr_desc_template.html +++ b/cl/search/templates/feeds/solr_desc_template.html @@ -7,7 +7,7 @@ {% else %} {% flag "o-es-active" %} {% if doc0.child_docs %} - {% with doc=doc0.child_docs.0|get_attrdict:"_source" %} + {% with doc=doc0.child_docs.0|get_es_doc_content %}

{{ doc.text|render_string_or_list|safe|truncatewords:"500" }}


{% endwith %} {% else %} diff --git a/cl/search/templates/includes/pa_search_result.html b/cl/search/templates/includes/pa_search_result.html index feecdd1be2..f290514af8 100644 --- a/cl/search/templates/includes/pa_search_result.html +++ b/cl/search/templates/includes/pa_search_result.html @@ -4,7 +4,7 @@ {% load humanize %} {% for result in results.object_list %} - {% with opinion=result.grouped_by_opinion_cluster_id.hits.hits.0|get_attrdict:"_source" %} + {% with opinion=result.grouped_by_opinion_cluster_id.hits.hits.0|get_es_doc_content %}

@@ -30,7 +30,7 @@

{% for parenthetical_group in result.grouped_by_opinion_cluster_id.hits.hits %}
- {% with pa_group=parenthetical_group|get_attrdict:"_source" %} + {% with pa_group=parenthetical_group|get_es_doc_content %}

{{ pa_group.representative_text|safe }} diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html index db862a36aa..62514e70f1 100644 --- a/cl/search/templates/includes/search_result.html +++ b/cl/search/templates/includes/search_result.html @@ -163,7 +163,7 @@

{% endif %} {% for doc in result.child_docs %} - {% with doc=doc|get_attrdict:"_source" %} + {% with doc=doc|get_es_doc_content %}

{% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe }} — {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %} @@ -414,7 +414,7 @@

{% if type == SEARCH_TYPES.OPINION or type_override == SEARCH_TYPES.OPINION and simple == False %} {% for doc in result.child_docs %} - {% with doc=doc|get_attrdict:"_source" %} + {% with doc=doc|get_es_doc_content %}
{% if result.child_docs|length > 1 or doc.type != 'combined-opinion' %}

diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html index 5c72181168..0eced26d1c 100644 --- a/cl/search/templates/search.html +++ b/cl/search/templates/search.html @@ -1,6 +1,7 @@ {% extends 'base.html' %} {% load humanize %} {% load text_filters %} +{% load extras %} {% load static %} {% load waffle_tags %} @@ -177,7 +178,8 @@
- {% if search_form.type.value == SEARCH_TYPES.OPINION or search_form.type.value == SEARCH_TYPES.ORAL_ARGUMENT %} + {% alerts_supported search_form.type.value as search_alerts_supported %} + {% if search_alerts_supported %} {% include "includes/alert_modal.html" %} {% endif %}
- {% if search_form.type.value == SEARCH_TYPES.OPINION or search_form.type.value == SEARCH_TYPES.ORAL_ARGUMENT %} + {% alerts_supported search_form.type.value as search_alerts_supported %} + {% if search_alerts_supported %} {% if not error and get_string %} None: prioritizing the different text fields available in the content when highlighting is disabled.""" - with self.captureOnCommitCallbacks(execute=True): - + with time_machine.travel( + self.mock_date, tick=False + ), self.captureOnCommitCallbacks(execute=True): c_2_opinion_1 = OpinionFactory.create( extracted_by_ocr=True, author=self.person_2, @@ -641,7 +642,6 @@ def test_extract_snippet_from_db_highlight_disabled(self) -> None: html_lawbox="html_lawbox & text from DB", cluster=self.opinion_cluster_2, ) - c_2_opinion_2 = OpinionFactory.create( extracted_by_ocr=True, author=self.person_2, @@ -716,7 +716,9 @@ def test_extract_snippet_from_db_highlight_disabled(self) -> None: ) self.assertEqual(expected_text, result_opinion["snippet"]) - with self.captureOnCommitCallbacks(execute=True): + with time_machine.travel( + self.mock_date, tick=False + ), self.captureOnCommitCallbacks(execute=True): c_2_opinion_1.delete() c_2_opinion_2.delete() c_2_opinion_3.delete() diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py index 0b7d6089fd..5878299362 100644 --- a/cl/search/tests/tests_es_oral_arguments.py +++ b/cl/search/tests/tests_es_oral_arguments.py @@ -984,8 +984,9 @@ def confirm_query_matched(response, query_id) -> bool: @staticmethod def save_percolator_query(cd): search_query = AudioDocument.search() - query, _ = build_es_base_query(search_query, cd) - query_dict = query.to_dict()["query"] + es_queries = build_es_base_query(search_query, cd) + search_query = es_queries.search_query + query_dict = search_query.to_dict()["query"] percolator_query = AudioPercolator( percolator_query=query_dict, rate=Alert.REAL_TIME ) diff --git a/cl/search/tests/tests_es_person.py b/cl/search/tests/tests_es_person.py index 5012c058d1..12a0b7f1fb 100644 --- a/cl/search/tests/tests_es_person.py +++ b/cl/search/tests/tests_es_person.py @@ -1342,7 +1342,8 @@ def test_has_child_filters(self) -> None: "type": SEARCH_TYPES.PEOPLE, } s = PersonDocument.search() - main_query, _ = build_es_base_query(s, cd) + es_queries = build_es_base_query(s, cd) + main_query = es_queries.search_query self.assertEqual(main_query.count(), 2) # Query by parent field dob_state and child field selection_method. @@ -1352,7 +1353,8 @@ def test_has_child_filters(self) -> None: "type": SEARCH_TYPES.PEOPLE, } s = PersonDocument.search() - main_query, _ = build_es_base_query(s, cd) + es_queries = build_es_base_query(s, cd) + main_query = es_queries.search_query self.assertEqual(main_query.count(), 1) position_5.delete() diff --git a/cl/tests/cases.py b/cl/tests/cases.py index 83aa93e4b5..6a24763f97 100644 --- a/cl/tests/cases.py +++ b/cl/tests/cases.py @@ -10,6 +10,7 @@ from django_elasticsearch_dsl.registries import registry from lxml import etree, html from rest_framework.test import APITestCase +from rest_framework.utils.serializer_helpers import ReturnList from cl.lib.redis_utils import get_redis_interface from cl.search.models import SEARCH_TYPES @@ -277,7 +278,10 @@ async def _test_api_fields_content( get_expected_value, ) in fields_to_compare.items(): with self.subTest(field=field): - parent_document = api_response.data["results"][0] + if isinstance(api_response, ReturnList): + parent_document = api_response[0] + else: + parent_document = api_response.data["results"][0] actual_value = parent_document.get(field) if field in ["recap_documents", "opinions", "positions"]: child_document = actual_value[0]