diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
new file mode 100644
index 0000000000..a38ff91f8c
--- /dev/null
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -0,0 +1,730 @@
+import copy
+import datetime
+import time
+import traceback
+from typing import Any, Literal
+
+import pytz
+from asgiref.sync import async_to_sync
+from django.contrib.auth.models import User
+from django.http import QueryDict
+from django.utils import timezone
+from elasticsearch import Elasticsearch
+from elasticsearch.exceptions import RequestError, TransportError
+from elasticsearch_dsl import connections
+from elasticsearch_dsl.response import Hit, Response
+from elasticsearch_dsl.utils import AttrList
+from redis import Redis
+
+from cl.alerts.models import Alert, ScheduledAlertHit
+from cl.alerts.tasks import send_search_alert_emails
+from cl.alerts.utils import (
+ TaskCompletionStatus,
+ add_document_hit_to_alert_set,
+ alert_hits_limit_reached,
+ has_document_alert_hit_been_triggered,
+)
+from cl.api.models import WebhookEventType
+from cl.api.tasks import send_search_alert_webhook_es
+from cl.lib.command_utils import VerboseCommand, logger
+from cl.lib.date_time import dt_as_local_date
+from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
+from cl.lib.redis_utils import get_redis_interface
+from cl.search.documents import (
+ DocketDocument,
+ ESRECAPSweepDocument,
+ RECAPSweepDocument,
+)
+from cl.search.exception import (
+ BadProximityQuery,
+ UnbalancedParenthesesQuery,
+ UnbalancedQuotesQuery,
+)
+from cl.search.models import SEARCH_TYPES, Docket
+from cl.stats.utils import tally_stat
+from cl.users.models import UserProfile
+
+
+def get_task_status(task_id: str, es: Elasticsearch) -> dict[str, Any]:
+ """Fetch the status of a task from Elasticsearch.
+
+ :param task_id: The ID of the task to fetch the status for.
+ :param es: The Elasticsearch client instance.
+ :return: The status of the task if successful, or an empty dictionary if
+ an error occurs.
+ """
+ try:
+ return es.tasks.get(task_id=task_id)
+ except (
+ TransportError,
+ ConnectionError,
+ RequestError,
+ ) as e:
+ logger.error("Error getting sweep alert index task status: %s", e)
+ return {}
+
+
+def compute_estimated_remaining_time(
+ initial_wait: float, task_status: TaskCompletionStatus
+) -> float:
+ """Compute the estimated remaining time for the re_index task to complete.
+
+ :param initial_wait: The default wait time in seconds.
+ :param task_status: An instance of `TaskCompletionStatus` containing task
+ information.
+ :return: The estimated remaining time in seconds. If the start time,
+ created, or total are invalid, the initial default time is returned.
+ """
+
+ if (
+ task_status.start_time_millis is None
+ or not task_status.created
+ or not task_status.total
+ ):
+ return initial_wait
+
+ start_time = datetime.datetime.fromtimestamp(
+ task_status.start_time_millis / 1000.0
+ )
+ time_now = datetime.datetime.now()
+ estimated_time_remaining = max(
+ datetime.timedelta(
+ seconds=(
+ (time_now - start_time).total_seconds() / task_status.created
+ )
+ * (task_status.total - task_status.created)
+ ).total_seconds(),
+ initial_wait,
+ )
+
+ return estimated_time_remaining
+
+
+def retrieve_task_info(task_info: dict[str, Any]) -> TaskCompletionStatus:
+ """Retrieve task information from the given task dict.
+
+ :param task_info: A dictionary containing the task status information.
+ :return: A `TaskCompletionStatus` object representing the extracted task
+ information.
+ """
+
+ if task_info:
+ status = task_info["task"]["status"]
+ return TaskCompletionStatus(
+ completed=task_info["completed"],
+ created=status["created"],
+ total=status["total"],
+ start_time_millis=task_info["task"]["start_time_in_millis"],
+ )
+ return TaskCompletionStatus()
+
+
+def index_daily_recap_documents(
+ r: Redis,
+ source_index_name: str,
+ target_index: type[RECAPSweepDocument] | type[ESRECAPSweepDocument],
+ testing: bool = False,
+ only_rd: bool = False,
+) -> int:
+ """Index Dockets added/modified during the day and all their RECAPDocuments
+ and RECAPDocuments added/modified during the day and their parent Dockets.
+ It uses the ES re_index API,
+
+ :param r: Redis client instance.
+ :param source_index_name: The source Elasticsearch index name from which
+ documents will be queried.
+ :param target_index: The target Elasticsearch index to which documents will
+ be re-indexed.
+ :param testing: Boolean flag for testing mode.
+ :param only_rd: Whether to reindex only RECAPDocuments into the
+ ESRECAPSweepDocument index.
+ :return: The total number of documents re-indexed.
+ """
+
+ if r.exists("alert_sweep:main_re_index_completed"):
+ logger.info(
+ "The main re-index task has been completed and will be omitted."
+ )
+ # The main re-indexing has been completed for the day. Abort it and
+ # proceed with RECAPDocument re-index.
+ return 0
+
+ if r.exists("alert_sweep:rd_re_index_completed"):
+ logger.info(
+ "The RECAPDocument only re-index task has been completed and will "
+ "be omitted."
+ )
+ # The RECAPDocument re-indexing has been completed for the day. Abort
+ # it and proceed with sending alerts.
+ return 0
+
+ if not r.exists("alert_sweep:query_date"):
+ # In case of a failure, store the date when alerts should be queried in
+ # Redis, so the command can be resumed.
+ local_now = timezone.localtime().replace(tzinfo=None)
+ local_midnight = local_now.replace(
+ hour=0, minute=0, second=0, microsecond=0
+ )
+ r.set("alert_sweep:query_date", local_midnight.isoformat())
+
+ else:
+ # If "alert_sweep:query_date" already exists get it from Redis.
+ local_midnight_str: str = str(r.get("alert_sweep:query_date"))
+ local_midnight = datetime.datetime.fromisoformat(local_midnight_str)
+ logger.info(f"Resuming re-indexing process for date: {local_midnight}")
+
+ es = connections.get_connection()
+ # Convert the local (PDT) midnight time to UTC
+ local_timezone = pytz.timezone(timezone.get_current_timezone_name())
+ local_midnight_localized = local_timezone.localize(local_midnight)
+ local_midnight_utc = local_midnight_localized.astimezone(pytz.utc)
+ next_day_utc = local_midnight_utc + datetime.timedelta(days=1)
+
+ today_datetime_iso = local_midnight_utc.isoformat().replace("+00:00", "Z")
+ next_day_utc_iso = next_day_utc.isoformat().replace("+00:00", "Z")
+ # Re Index API query.
+ query = (
+ {
+ "bool": {
+ "should": [
+ # Dockets added/modified today
+ {
+ "bool": {
+ "must": [
+ {
+ "range": {
+ "timestamp": {
+ "gte": today_datetime_iso,
+ "lt": next_day_utc_iso,
+ }
+ }
+ },
+ {"term": {"docket_child": "docket"}},
+ ]
+ }
+ },
+ # RECAPDocuments with parents added/modified today
+ {
+ "has_parent": {
+ "parent_type": "docket",
+ "query": {
+ "range": {
+ "timestamp": {
+ "gte": today_datetime_iso,
+ "lt": next_day_utc_iso,
+ }
+ }
+ },
+ }
+ },
+ # RECAPDocuments added/modified today
+ {
+ "bool": {
+ "must": [
+ {
+ "range": {
+ "timestamp": {
+ "gte": today_datetime_iso,
+ "lt": next_day_utc_iso,
+ }
+ }
+ },
+ {"term": {"docket_child": "recap_document"}},
+ ]
+ }
+ },
+ # Dockets that are parents of RECAPDocuments added/modified today
+ {
+ "has_child": {
+ "type": "recap_document",
+ "query": {
+ "range": {
+ "timestamp": {
+ "gte": today_datetime_iso,
+ "lt": next_day_utc_iso,
+ }
+ }
+ },
+ }
+ },
+ ]
+ }
+ }
+ if not only_rd
+ else {
+ "bool": {
+ "should": [
+ # RECAPDocuments with parents added/modified today
+ {
+ "has_parent": {
+ "parent_type": "docket",
+ "query": {
+ "range": {
+ "timestamp": {
+ "gte": today_datetime_iso,
+ "lt": next_day_utc_iso,
+ }
+ }
+ },
+ }
+ },
+ # RECAPDocuments added/modified today
+ {
+ "bool": {
+ "must": [
+ {
+ "range": {
+ "timestamp": {
+ "gte": today_datetime_iso,
+ "lt": next_day_utc_iso,
+ }
+ }
+ },
+ {"term": {"docket_child": "recap_document"}},
+ ]
+ }
+ },
+ ]
+ }
+ }
+ )
+
+ if not r.exists("alert_sweep:task_id"):
+ # Remove the index from the previous day and create a new one.
+ target_index._index.delete(ignore=404)
+ target_index.init()
+ target_index_name = target_index._index._name
+
+ # In case of a failure, store the task_id in Redis so the command
+ # can be resumed.
+ params = {
+ "source": {"index": source_index_name, "query": query},
+ "dest": {"index": target_index_name},
+ "wait_for_completion": False,
+ "refresh": True,
+ }
+ if only_rd:
+ # Re-index only RECADocument fields to the ESRECAPSweepDocument
+ # index
+ params["script"] = {
+ "source": """
+ def fields = [
+ 'id',
+ 'docket_entry_id',
+ 'description',
+ 'entry_number',
+ 'entry_date_filed',
+ 'short_description',
+ 'document_type',
+ 'document_number',
+ 'pacer_doc_id',
+ 'plain_text',
+ 'attachment_number',
+ 'is_available',
+ 'page_count',
+ 'filepath_local',
+ 'absolute_url',
+ 'cites'
+ ];
+ ctx._source.keySet().retainAll(fields);
+ """
+ }
+ response = es.reindex(**params)
+ # Store the task ID in Redis
+ task_id = response["task"]
+ r.set("alert_sweep:task_id", task_id)
+ logger.info(f"Re-indexing task scheduled ID: {task_id}")
+ else:
+ task_id = r.get("alert_sweep:task_id")
+ logger.info(f"Resuming re-index task ID: {task_id}")
+
+ initial_wait = 0.01 if testing else 60.0
+ time.sleep(initial_wait)
+ task_info = retrieve_task_info(get_task_status(task_id, es))
+ iterations_count = 0
+ estimated_time_remaining = compute_estimated_remaining_time(
+ initial_wait, task_info
+ )
+ while not task_info.completed:
+ logger.info(
+ f"Task progress: {task_info.created}/{task_info.total} documents. "
+ f"Estimated time to finish: {estimated_time_remaining} seconds."
+ )
+ task_status = get_task_status(task_id, es)
+ task_info = retrieve_task_info(task_status)
+ time.sleep(estimated_time_remaining)
+ if task_info and not task_info.completed:
+ estimated_time_remaining = compute_estimated_remaining_time(
+ initial_wait, task_info
+ )
+ if not task_info:
+ iterations_count += 1
+ if iterations_count > 10:
+ logger.error(
+ "Re_index alert sweep index task has failed: %s/%s",
+ task_info.created,
+ task_info.total,
+ )
+ break
+
+ r.delete("alert_sweep:task_id")
+ return task_info.total
+
+
+def should_docket_hit_be_included(
+ r: Redis, alert_id: int, docket_id: int, query_date: datetime.date
+) -> bool:
+ """Determine if a Docket alert should be triggered based on its
+ date_modified and if the docket has triggered the alert previously.
+
+ :param r: The Redis interface.
+ :param alert_id: The ID of the alert.
+ :param docket_id: The ID of the docket.
+ :param query_date: The daily re_index query date.
+ :return: True if the Docket alert should be triggered, False otherwise.
+ """
+ docket = Docket.objects.filter(id=docket_id).only("date_modified").first()
+ if not docket:
+ return False
+ if not has_document_alert_hit_been_triggered(r, alert_id, "d", docket_id):
+ # Confirm the docket has been modified during the day we’re sending
+ # alerts to avoid triggering docket-only alerts due to RECAPDocuments
+ # related to the case being indexed during the day since RD contains
+ # docket fields indexed which can trigger docket-only alerts.
+ date_modified_localized = dt_as_local_date(docket.date_modified)
+ if date_modified_localized == query_date:
+ return True
+ return False
+
+
+def filter_rd_alert_hits(
+ r: Redis,
+ alert_id: int,
+ rd_hits: AttrList,
+ rd_ids: list[int],
+ check_rd_matched=False,
+):
+ """Filter RECAP document hits based on specified conditions.
+
+ :param r: The Redis interface.
+ :param alert_id: The ID of the alert.
+ :param rd_hits: A list of RECAPDocument hits to be processed.
+ :param rd_ids: A list of RECAPDocument IDs that matched the RECAPDocument
+ only query.
+ :param check_rd_matched: A boolean indicating whether to check if the RECAP
+ document hit from the main query also matches the RECAPDocument-only query
+ :return: A list of RECAP document hits that meet all specified conditions.
+ """
+
+ rds_to_send = []
+ for rd_hit in rd_hits:
+ conditions = [
+ not has_document_alert_hit_been_triggered(
+ r, alert_id, "r", rd_hit["_source"]["id"]
+ )
+ ]
+ if check_rd_matched:
+ # Add condition to check if the RD hit is within the RD IDS returned
+ # by the RECAPDocument-only query.
+ conditions.append(rd_hit["_source"]["id"] in rd_ids)
+ if all(conditions):
+ rds_to_send.append(rd_hit)
+ add_document_hit_to_alert_set(
+ r, alert_id, "r", rd_hit["_source"]["id"]
+ )
+ return rds_to_send
+
+
+def query_alerts(
+ search_params: QueryDict,
+) -> tuple[list[Hit] | None, Response | None, Response | None]:
+ try:
+ search_query = RECAPSweepDocument.search()
+ child_search_query = ESRECAPSweepDocument.search()
+ return do_es_sweep_alert_query(
+ search_query,
+ child_search_query,
+ search_params,
+ )
+ except (
+ UnbalancedParenthesesQuery,
+ UnbalancedQuotesQuery,
+ BadProximityQuery,
+ TransportError,
+ ConnectionError,
+ RequestError,
+ ):
+ traceback.print_exc()
+ logger.info(f"Search for this alert failed: {search_params}\n")
+ return None, None, None
+
+
+def process_alert_hits(
+ r: Redis,
+ results: list[Hit],
+ parent_results: Response | None,
+ child_results: Response | None,
+ alert_id: int,
+ query_date: datetime.date,
+) -> list[Hit]:
+ """Process alert hits by filtering and prepare the results to send based
+ on alert conditions.
+
+ :param r: The Redis instance.
+ :param results: A list of Hit objects containing search results.
+ :param parent_results: The ES Response for the docket-only query.
+ :param child_results: The ES Response for the RECAPDocument-only query.
+ :param alert_id: The ID of the alert being processed.
+ :param query_date: The daily re_index query date.
+ :return: A list of Hit objects that are filtered and prepared to be sent.
+ """
+ docket_hits = parent_results.hits if parent_results else []
+ docket_ids = [int(d.docket_id) for d in docket_hits]
+
+ rd_hits = child_results.hits if child_results else []
+ rd_ids = [int(r.id) for r in rd_hits]
+ results_to_send = []
+ if len(results) > 0:
+ for hit in results:
+ if hit.docket_id in docket_ids:
+ # Possible Docket-only alert
+ rds_to_send = filter_rd_alert_hits(
+ r,
+ alert_id,
+ hit["child_docs"],
+ rd_ids,
+ check_rd_matched=True,
+ )
+ if rds_to_send:
+ # Docket OR RECAPDocument alert.
+ hit["child_docs"] = rds_to_send
+ results_to_send.append(hit)
+ if should_docket_hit_be_included(
+ r, alert_id, hit.docket_id, query_date
+ ):
+ add_document_hit_to_alert_set(
+ r, alert_id, "d", hit.docket_id
+ )
+
+ elif should_docket_hit_be_included(
+ r, alert_id, hit.docket_id, query_date
+ ):
+ # Docket-only alert
+ hit["child_docs"] = []
+ results_to_send.append(hit)
+ add_document_hit_to_alert_set(
+ r, alert_id, "d", hit.docket_id
+ )
+ else:
+ # RECAPDocument-only alerts or cross-object alerts
+ rds_to_send = filter_rd_alert_hits(
+ r, alert_id, hit["child_docs"], rd_ids
+ )
+ if rds_to_send:
+ hit["child_docs"] = rds_to_send
+ results_to_send.append(hit)
+ return results_to_send
+
+
+def send_search_alert_webhooks(
+ user: UserProfile.user, results_to_send: list[Hit], alert_id: int
+) -> None:
+ """Send webhook events for search alerts if the user has SEARCH_ALERT
+ endpoints enabled.
+
+ :param user: The user object whose webhooks need to be checked.
+ :param results_to_send: A list of Hit objects that contain the search
+ results to be sent.
+ :param alert_id: The Alert ID to be sent in the webhook.
+ """
+ user_webhooks = user.webhooks.filter(
+ event_type=WebhookEventType.SEARCH_ALERT, enabled=True
+ )
+ for user_webhook in user_webhooks:
+ send_search_alert_webhook_es.delay(
+ results_to_send, user_webhook.pk, alert_id
+ )
+
+
+def query_and_send_alerts(
+ r: Redis, rate: Literal["rt", "dly"], query_date: datetime.date
+) -> None:
+ """Query the sweep index and send alerts based on the specified rate
+ and date.
+
+ :param r: The Redis interface.
+ :param rate: The rate at which to query alerts.
+ :param query_date: The daily re_index query date.
+ :return: None.
+ """
+
+ alert_users: UserProfile.user = User.objects.filter(
+ alerts__rate=rate
+ ).distinct()
+ alerts_sent_count = 0
+ now_time = datetime.datetime.now()
+ for user in alert_users:
+ if rate == Alert.REAL_TIME and not user.profile.is_member:
+ continue
+ alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP)
+ logger.info(f"Running alerts for user '{user}': {alerts}")
+
+ hits = []
+ alerts_to_update = []
+ for alert in alerts:
+ search_params = QueryDict(alert.query.encode(), mutable=True)
+ results, parent_results, child_results = query_alerts(
+ search_params
+ )
+ if not results:
+ continue
+ alerts_to_update.append(alert.pk)
+ search_type = search_params.get("type", SEARCH_TYPES.RECAP)
+ results_to_send = process_alert_hits(
+ r, results, parent_results, child_results, alert.pk, query_date
+ )
+ if not results_to_send:
+ continue
+ hits.append(
+ [
+ alert,
+ search_type,
+ results_to_send,
+ len(results_to_send),
+ ]
+ )
+ alert.query_run = search_params.urlencode() # type: ignore
+ alert.date_last_hit = timezone.now()
+ alert.save()
+
+ # Send webhooks
+ send_search_alert_webhooks(user, results_to_send, alert.pk)
+
+ if hits:
+ send_search_alert_emails.delay([(user.pk, hits)])
+ alerts_sent_count += 1
+
+ # Update Alert's date_last_hit in bulk.
+ Alert.objects.filter(id__in=alerts_to_update).update(
+ date_last_hit=now_time
+ )
+ async_to_sync(tally_stat)(f"alerts.sent.{rate}", inc=alerts_sent_count)
+ logger.info(f"Sent {alerts_sent_count} {rate} email alerts.")
+
+
+def query_and_schedule_alerts(
+ r: Redis, rate: Literal["wly", "mly"], query_date: datetime.date
+) -> None:
+ """Query the sweep index and schedule alerts based on the specified rate
+ and date.
+
+ :param r: The Redis interface.
+ :param rate: The rate at which to query alerts.
+ :param query_date: The daily re_index query date.
+ :return: None.
+ """
+
+ alert_users = User.objects.filter(alerts__rate=rate).distinct()
+ for user in alert_users:
+ alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP)
+ logger.info(f"Running '{rate}' alerts for user '{user}': {alerts}")
+ scheduled_hits_to_create = []
+ for alert in alerts:
+ search_params = QueryDict(alert.query.encode(), mutable=True)
+ results, parent_results, child_results = query_alerts(
+ search_params
+ )
+ if not results:
+ continue
+
+ results_to_send = process_alert_hits(
+ r, results, parent_results, child_results, alert.pk, query_date
+ )
+ if not results_to_send:
+ continue
+ for hit in results_to_send:
+ # Schedule DAILY, WEEKLY and MONTHLY Alerts
+ if alert_hits_limit_reached(alert.pk, user.pk):
+ # Skip storing hits for this alert-user combination because
+ # the SCHEDULED_ALERT_HITS_LIMIT has been reached.
+ continue
+
+ child_result_objects = []
+ hit_copy = copy.deepcopy(hit)
+ if hasattr(hit_copy, "child_docs"):
+ for child_doc in hit_copy.child_docs:
+ child_result_objects.append(
+ child_doc["_source"].to_dict()
+ )
+ hit_copy["child_docs"] = child_result_objects
+ scheduled_hits_to_create.append(
+ ScheduledAlertHit(
+ user=user,
+ alert=alert,
+ document_content=hit_copy.to_dict(),
+ )
+ )
+ # Send webhooks
+ send_search_alert_webhooks(user, results_to_send, alert.pk)
+
+ # Create scheduled WEEKLY and MONTHLY Alerts in bulk.
+ if scheduled_hits_to_create:
+ ScheduledAlertHit.objects.bulk_create(scheduled_hits_to_create)
+
+
+class Command(VerboseCommand):
+ """Query and re-index (into the RECAP sweep index) all the RECAP content
+ that has changed during the current period, along with their related
+ documents. Then use the RECAP sweep index to query and send real-time and
+ daily RECAP alerts. Finally, schedule weekly and monthly RECAP alerts.
+ """
+
+ help = "Send RECAP Search Alerts."
+
+ def add_arguments(self, parser):
+ parser.add_argument(
+ "--testing-mode",
+ action="store_true",
+ help="Use this flag for testing purposes.",
+ )
+
+ def handle(self, *args, **options):
+ super().handle(*args, **options)
+ testing_mode = options.get("testing_mode", False)
+ r = get_redis_interface("CACHE")
+ index_daily_recap_documents(
+ r,
+ DocketDocument._index._name,
+ RECAPSweepDocument,
+ testing=testing_mode,
+ )
+ if not testing_mode:
+ # main_re_index_completed key so the main re_index task can be
+ # omitted in case of a failure.
+ r.set("alert_sweep:main_re_index_completed", 1, ex=3600 * 12)
+ index_daily_recap_documents(
+ r,
+ DocketDocument._index._name,
+ ESRECAPSweepDocument,
+ testing=testing_mode,
+ only_rd=True,
+ )
+ if not testing_mode:
+ # rd_re_index_completed key so the RECAPDocument re_index task
+ # can be omitted in case of a failure.
+ r.set("alert_sweep:rd_re_index_completed", 1, ex=3600 * 12)
+
+ query_date = timezone.localtime(
+ timezone.make_aware(
+ datetime.datetime.fromisoformat(
+ str(r.get("alert_sweep:query_date"))
+ )
+ )
+ ).date()
+ query_and_send_alerts(r, Alert.REAL_TIME, query_date)
+ query_and_send_alerts(r, Alert.DAILY, query_date)
+ query_and_schedule_alerts(r, Alert.WEEKLY, query_date)
+ query_and_schedule_alerts(r, Alert.MONTHLY, query_date)
+ r.delete("alert_sweep:main_re_index_completed")
+ r.delete("alert_sweep:rd_re_index_completed")
+ r.delete("alert_sweep:query_date")
diff --git a/cl/alerts/management/commands/cl_send_scheduled_alerts.py b/cl/alerts/management/commands/cl_send_scheduled_alerts.py
index 8fefd675f3..e0b23c8420 100644
--- a/cl/alerts/management/commands/cl_send_scheduled_alerts.py
+++ b/cl/alerts/management/commands/cl_send_scheduled_alerts.py
@@ -99,7 +99,9 @@ def query_and_send_alerts_by_rate(rate: str) -> None:
)
)
if hits:
- send_search_alert_emails.delay([(user_id, hits)])
+ send_search_alert_emails.delay(
+ [(user_id, hits)], scheduled_alert=True
+ )
alerts_sent_count += 1
# Update Alert's date_last_hit in bulk.
diff --git a/cl/alerts/management/commands/clean_up_search_alerts.py b/cl/alerts/management/commands/clean_up_search_alerts.py
index b00d7128a3..df8a28b2d6 100644
--- a/cl/alerts/management/commands/clean_up_search_alerts.py
+++ b/cl/alerts/management/commands/clean_up_search_alerts.py
@@ -75,7 +75,8 @@ def validate_queries_syntax(options: OptionsType) -> None:
if search_form.is_valid():
cd = search_form.cleaned_data
try:
- s, _ = build_es_base_query(search_query, cd)
+ es_queries = build_es_base_query(search_query, cd)
+ s = es_queries.search_query
s = s.extra(size=0)
s.execute().to_dict()
# Waiting between requests to avoid hammering ES too quickly.
diff --git a/cl/alerts/tasks.py b/cl/alerts/tasks.py
index da43d8155d..e3aad0a071 100644
--- a/cl/alerts/tasks.py
+++ b/cl/alerts/tasks.py
@@ -25,7 +25,7 @@
from cl.api.models import WebhookEventType
from cl.api.tasks import (
send_docket_alert_webhook_events,
- send_es_search_alert_webhook,
+ send_search_alert_webhook_es,
)
from cl.celery_init import app
from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -458,22 +458,25 @@ def send_webhook_alert_hits(
event_type=WebhookEventType.SEARCH_ALERT, enabled=True
)
for user_webhook in user_webhooks:
- send_es_search_alert_webhook.delay(
+ send_search_alert_webhook_es.delay(
documents,
user_webhook.pk,
- alert,
+ alert.pk,
)
@app.task(ignore_result=True)
def send_search_alert_emails(
- email_alerts_to_send: list[tuple[int, list[SearchAlertHitType]]]
+ email_alerts_to_send: list[tuple[int, list[SearchAlertHitType]]],
+ scheduled_alert: bool = False,
) -> None:
"""Send search alert emails for multiple users.
:param email_alerts_to_send: A list of two tuples containing the user to
whom the alerts should be sent. A list of tuples containing the Search
Alert, (Alert, search type, documents, and number of documents)
+ :param scheduled_alert: A boolean indicating weather this alert has been
+ scheduled
:return: None
"""
@@ -491,6 +494,7 @@ def send_search_alert_emails(
context = {
"hits": hits,
"hits_limit": settings.SCHEDULED_ALERT_HITS_LIMIT,
+ "scheduled_alert": scheduled_alert,
}
headers = {}
query_string = ""
diff --git a/cl/alerts/templates/alert_email_es.html b/cl/alerts/templates/alert_email_es.html
index 36ccee5ccc..dc2f797268 100644
--- a/cl/alerts/templates/alert_email_es.html
+++ b/cl/alerts/templates/alert_email_es.html
@@ -25,7 +25,7 @@
- Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% endif %} alert — {{alert.name}} — had {{num_results}}{% if num_results >= hits_limit %}+{% endif %} hit{{results|pluralize}}:
+ Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% elif type == 'r' %}RECAP{% endif %} alert — {{alert.name}} — had {{num_results}}{% if num_results >= hits_limit %}+{% endif %} hit{{results|pluralize}}:
View Full Results / Edit this Alert
@@ -34,28 +34,66 @@
-
- View original:
-
- {% if result.download_url %}
-
- From the court
+ {% if type == 'r' %}
+ {% if result.docketNumber %}
+ Docket Number:
+ {{ result|get_highlight:"docketNumber"|safe }}
+ {% endif %}
+
+ Date Filed:
+ {% if result.dateFiled %}
+ {{ result.dateFiled|date:"F jS, Y" }}
+ {% else %}
+ Unknown Date
+ {% endif %}
+
+ {% if result.child_docs and result.child_remaining %}
+ {% extract_q_value alert.query_run as q_value %}
+
+ View Additional Results for this Case
+
+
+ {% endif %}
+ {% else %}
+
+
+ View original:
+
+ {% if result.download_url %}
+
+ From the court
+
+ |
+ {% endif %}
+ {% if result.local_path %}
+ {# Provide link to S3. #}
+
+ Our backup
- |
{% endif %}
- {% if result.local_path %}
- {# Provide link to S3. #}
-
- Our backup
-
+
{% endif %}
-
{% if type == 'oa' %}
Date Argued:
diff --git a/cl/alerts/templates/alert_email_es.txt b/cl/alerts/templates/alert_email_es.txt
index db8363e713..2b7ec3b569 100644
--- a/cl/alerts/templates/alert_email_es.txt
+++ b/cl/alerts/templates/alert_email_es.txt
@@ -10,16 +10,23 @@ CourtListener.com
We have news regarding your alerts at CourtListener.com
-------------------------------------------------------
-{% for alert, type, results, num_results in hits %}{% for result in results %}{% if forloop.first %}Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% endif %} alert -- {{alert.name}} -- had {{num_results}}{% if num_results >= hits_limit %}+{% endif %} hit{{results|pluralize}}:
+{% for alert, type, results, num_results in hits %}{% for result in results %}{% if forloop.first %}Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% elif type == 'r' %}RECAP{% endif %} alert -- {{alert.name}} -- had {{num_results}}{% if num_results >= hits_limit %}+{% endif %} hit{{results|pluralize}}:
-------------------------------------------------------
View Full Results / Edit this Alert: https://www.courtlistener.com/?{{ alert.query_run|safe }}&edit_alert={{ alert.pk }}
Disable this Alert (one click): https://www.courtlistener.com{% url "disable_alert" alert.secret_key %}{% endif %}
-{{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
+{{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' or type == 'r' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
{% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}{% endif %}
-{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}
+{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}{% endif %}
+{% if type == 'r' %}{% if result.dateFiled %}Date Filed: {{ result.dateFiled|date:"F jS, Y" }}{% else %}Date Filed: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %}
+{% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:scheduled_alert %} - {% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe|striptags }} - {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
+ {% if doc.description %}Description: {{ doc.description|render_string_or_list|safe|striptags }}{% endif %}
+ {% if doc.plain_text %}{% contains_highlights doc.plain_text.0 True as highlighted %}{% if highlighted %}...{% endif %}{{ doc.plain_text|render_string_or_list|safe|striptags|underscore_to_space }}...{% endif %}
+ View this document on our site: https://www.courtlistener.com{% if doc.absolute_url %}{{ doc.absolute_url }}{% else %}{{ result.docket_absolute_url }}#minute-entry-{{ doc.docket_entry_id }}{% endif %}
+{% endwith %}{% endfor %}
+{% if result.child_docs and result.child_remaining %}{% extract_q_value alert.query_run as q_value %}View Additional Results for this Case: https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}{% endif %}
{% endif %}~~~~~
- - View this item on our site: https://www.courtlistener.com{{result.absolute_url}}
+ - View this item on our site: https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %}
{% if result.download_url %} - Download original from the court: {{result.download_url}}
{% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endfor %}
diff --git a/cl/alerts/tests/__init__.py b/cl/alerts/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/cl/alerts/tests.py b/cl/alerts/tests/tests.py
similarity index 100%
rename from cl/alerts/tests.py
rename to cl/alerts/tests/tests.py
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
new file mode 100644
index 0000000000..8e57da973a
--- /dev/null
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -0,0 +1,1627 @@
+import datetime
+from unittest import mock
+
+import time_machine
+from asgiref.sync import sync_to_async
+from django.core import mail
+from django.core.management import call_command
+from django.test.utils import override_settings
+from django.urls import reverse
+from django.utils.html import strip_tags
+from django.utils.timezone import now
+from elasticsearch_dsl import Q
+from lxml import html
+
+from cl.alerts.factories import AlertFactory
+from cl.alerts.management.commands.cl_send_recap_alerts import (
+ index_daily_recap_documents,
+)
+from cl.alerts.models import SEARCH_TYPES, Alert, ScheduledAlertHit
+from cl.api.factories import WebhookFactory
+from cl.api.models import WebhookEvent, WebhookEventType
+from cl.donate.models import NeonMembership
+from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
+from cl.lib.redis_utils import get_redis_interface
+from cl.lib.test_helpers import RECAPSearchTestCase
+from cl.search.documents import DocketDocument, RECAPSweepDocument
+from cl.search.factories import (
+ DocketEntryWithParentsFactory,
+ DocketFactory,
+ RECAPDocumentFactory,
+)
+from cl.search.models import Docket
+from cl.tests.cases import ESIndexTestCase, TestCase
+from cl.tests.utils import MockResponse
+from cl.users.factories import UserProfileWithParentsFactory
+
+
+class RECAPAlertsSweepIndexTest(
+ RECAPSearchTestCase, ESIndexTestCase, TestCase
+):
+ """
+ RECAP Alerts Sweep Index Tests
+ """
+
+ @classmethod
+ def setUpTestData(cls):
+ cls.rebuild_index("people_db.Person")
+ cls.rebuild_index("search.Docket")
+ cls.mock_date = now()
+ with time_machine.travel(cls.mock_date, tick=False):
+ super().setUpTestData()
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+
+ cls.user_profile = UserProfileWithParentsFactory()
+ NeonMembership.objects.create(
+ level=NeonMembership.LEGACY, user=cls.user_profile.user
+ )
+ cls.user_profile_2 = UserProfileWithParentsFactory()
+ NeonMembership.objects.create(
+ level=NeonMembership.LEGACY, user=cls.user_profile_2.user
+ )
+ cls.user_profile_no_member = UserProfileWithParentsFactory()
+ cls.webhook_enabled = WebhookFactory(
+ user=cls.user_profile.user,
+ event_type=WebhookEventType.SEARCH_ALERT,
+ url="https://example.com/",
+ enabled=True,
+ )
+
+ def setUp(self):
+ self.r = get_redis_interface("CACHE")
+ self.r.delete("alert_sweep:query_date")
+ self.r.delete("alert_sweep:task_id")
+ keys = self.r.keys("alert_hits:*")
+ if keys:
+ self.r.delete(*keys)
+
+ @staticmethod
+ def get_html_content_from_email(email_content):
+ html_content = None
+ for content, content_type in email_content.alternatives:
+ if content_type == "text/html":
+ html_content = content
+ break
+ return html_content
+
+ def _confirm_number_of_alerts(self, html_content, expected_count):
+ """Test the number of alerts included in the email alert."""
+ tree = html.fromstring(html_content)
+ got = len(tree.xpath("//h2"))
+
+ self.assertEqual(
+ got,
+ expected_count,
+ msg="Did not get the right number of alerts in the email. "
+ "Expected: %s - Got: %s\n\n" % (expected_count, got),
+ )
+
+ @staticmethod
+ def _extract_cases_from_alert(html_tree, alert_title):
+ """Extract the case elements (h3) under a specific alert (h2) from the
+ HTML tree.
+ """
+ alert_element = html_tree.xpath(
+ f"//h2[contains(text(), '{alert_title}')]"
+ )
+ h2_elements = html_tree.xpath("//h2")
+ alert_index = h2_elements.index(alert_element[0])
+ # Find the
elements between this and the next
+ if alert_index + 1 < len(h2_elements):
+ next_alert_element = h2_elements[alert_index + 1]
+ alert_cases = html_tree.xpath(
+ f"//h2[contains(text(), '{alert_title}')]/following-sibling::*[following-sibling::h2[1] = '{next_alert_element.text}'][self::h3]"
+ )
+ else:
+ alert_cases = html_tree.xpath(
+ f"//h2[contains(text(), '{alert_title}')]/following-sibling::h3"
+ )
+ return alert_cases
+
+ def _count_alert_hits_and_child_hits(
+ self,
+ html_content,
+ alert_title,
+ expected_hits,
+ case_title,
+ expected_child_hits,
+ ):
+ """Confirm the following assertions for the email alert:
+ - An specific alert is included in the email alert.
+ - The specified alert contains the expected number of hits.
+ - The specified case contains the expected number of child hits.
+ """
+ tree = html.fromstring(html_content)
+ alert_element = tree.xpath(f"//h2[contains(text(), '{alert_title}')]")
+ self.assertTrue(
+ alert_element, msg=f"Not alert with title {alert_title} found."
+ )
+ alert_cases = self._extract_cases_from_alert(tree, alert_title)
+ self.assertEqual(
+ len(alert_cases),
+ expected_hits,
+ msg="Did not get the right number of hits for the alert %s. "
+ "Expected: %s - Got: %s\n\n"
+ % (alert_title, expected_hits, len(alert_cases)),
+ )
+ if case_title:
+ for case in alert_cases:
+ child_hit_count = 0
+ case_text = " ".join(
+ [element.strip() for element in case.xpath(".//text()")]
+ )
+ if case_title in case_text:
+ child_hit_count = len(
+ case.xpath("following-sibling::ul[1]/li/a")
+ )
+ self.assertEqual(
+ child_hit_count,
+ expected_child_hits,
+ msg="Did not get the right number of child hits for the case %s. "
+ "Expected: %s - Got: %s\n\n"
+ % (case_title, expected_child_hits, child_hit_count),
+ )
+ break
+
+ def _assert_child_hits_content(
+ self,
+ html_content,
+ alert_title,
+ case_title,
+ expected_child_descriptions,
+ ):
+ """Confirm the child hits in a case are the expected ones, comparing
+ their descriptions.
+ """
+ tree = html.fromstring(html_content)
+ alert_element = tree.xpath(f"//h2[contains(text(), '{alert_title}')]")
+ # Find the corresponding case_title under the alert_element
+ alert_cases = self._extract_cases_from_alert(tree, alert_title)
+
+ def extract_child_descriptions(case_item):
+ child_documents = case_item.xpath("./following-sibling::ul[1]/li")
+ results = []
+ for li in child_documents:
+ a_tag = li.xpath(".//a")[0]
+ full_text = a_tag.text_content()
+ first_part = full_text.split("\u2014")[0].strip()
+ results.append(first_part)
+
+ return results
+
+ child_descriptions = set()
+ for case in alert_cases:
+ case_text = "".join(case.xpath(".//text()")).strip()
+ if case_title in case_text:
+ child_descriptions = set(extract_child_descriptions(case))
+ break
+
+ self.assertEqual(
+ child_descriptions,
+ set(expected_child_descriptions),
+ msg=f"Child hits didn't match for case {case_title}, Got {child_descriptions}, Expected: {expected_child_descriptions} ",
+ )
+
+ def _count_webhook_hits_and_child_hits(
+ self,
+ webhooks,
+ alert_title,
+ expected_hits,
+ case_title,
+ expected_child_hits,
+ ):
+ """Confirm the following assertions for the search alert webhook:
+ - An specific alert webhook was triggered.
+ - The specified alert contains the expected number of hits.
+ - The specified case contains the expected number of child hits.
+ """
+
+ for webhook in webhooks:
+ if webhook["payload"]["alert"]["name"] == alert_title:
+ webhook_cases = webhook["payload"]["results"]
+ self.assertEqual(
+ len(webhook_cases),
+ expected_hits,
+ msg=f"Did not get the right number of hits for the alert %s. "
+ % alert_title,
+ )
+ for case in webhook["payload"]["results"]:
+ if case_title == strip_tags(case["caseName"]):
+ self.assertEqual(
+ len(case["recap_documents"]),
+ expected_child_hits,
+ msg=f"Did not get the right number of child documents for the case %s. "
+ % case_title,
+ )
+
+ def test_filter_recap_alerts_to_send(self) -> None:
+ """Test filter RECAP alerts that met the conditions to be sent:
+ - RECAP type alert.
+ - RT or DLY rate
+ - For RT rate the user must have an active membership.
+ """
+
+ rt_recap_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test RT RECAP Alert",
+ query='q="401 Civil"&type=r',
+ )
+ dly_recap_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.DAILY,
+ name="Test DLY RECAP Alert",
+ query='q="401 Civil"&type=r',
+ )
+ AlertFactory(
+ user=self.user_profile_2.user,
+ rate=Alert.REAL_TIME,
+ name="Test RT RECAP Alert",
+ query='q="401 Civil"',
+ )
+ AlertFactory(
+ user=self.user_profile_no_member.user,
+ rate=Alert.REAL_TIME,
+ name="Test RT RECAP Alert no Member",
+ query='q="401 Civil"&type=r',
+ )
+
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # Only the RECAP RT alert for a member and the RECAP DLY alert are sent.
+ self.assertEqual(
+ len(mail.outbox), 2, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[0])
+ self.assertIn(rt_recap_alert.name, html_content)
+
+ html_content = self.get_html_content_from_email(mail.outbox[1])
+ self.assertIn(dly_recap_alert.name, html_content)
+
+ def test_index_daily_recap_documents(self) -> None:
+ """Test index_daily_recap_documents method over different documents
+ conditions.
+ """
+ RECAPSweepDocument._index.delete(ignore=404)
+ RECAPSweepDocument.init()
+ recap_search = DocketDocument.search()
+ recap_dockets = recap_search.query(Q("match", docket_child="docket"))
+ self.assertEqual(recap_dockets.count(), 2)
+
+ recap_documents = recap_search.query(
+ Q("match", docket_child="recap_document")
+ )
+ self.assertEqual(recap_documents.count(), 3)
+
+ sweep_search = RECAPSweepDocument.search()
+ self.assertEqual(
+ sweep_search.count(),
+ 0,
+ msg="Wrong number of documents in the sweep index.",
+ )
+
+ # Index documents based Dockets changed today + all their
+ # RECAPDocuments indexed the same day.
+ with time_machine.travel(self.mock_date, tick=False):
+ documents_indexed = index_daily_recap_documents(
+ self.r,
+ DocketDocument._index._name,
+ RECAPSweepDocument,
+ testing=True,
+ )
+ self.assertEqual(
+ documents_indexed, 5, msg="Wrong number of documents indexed."
+ )
+
+ sweep_search = RECAPSweepDocument.search()
+ dockets_sweep = sweep_search.query(Q("match", docket_child="docket"))
+ self.assertEqual(dockets_sweep.count(), 2)
+
+ documents_sweep = sweep_search.query(
+ Q("match", docket_child="recap_document")
+ )
+ self.assertEqual(documents_sweep.count(), 3)
+
+ # Index Docket changed today + their RECAPDocuments indexed on
+ # previous days
+ with time_machine.travel(self.mock_date, tick=False):
+ docket = DocketFactory(
+ court=self.court,
+ case_name="SUBPOENAS SERVED CASE",
+ docket_number="1:21-bk-1234",
+ source=Docket.RECAP,
+ )
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+
+ # Its related RD is ingested two days before.
+ two_days_before = now() - datetime.timedelta(days=2)
+ mock_two_days_before = two_days_before.replace(hour=5)
+ with time_machine.travel(mock_two_days_before, tick=False):
+ alert_de = DocketEntryWithParentsFactory(
+ docket=docket,
+ entry_number=1,
+ date_filed=datetime.date(2024, 8, 19),
+ description="MOTION for Leave to File Amicus Curiae Lorem Served",
+ )
+ rd = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Motion to File",
+ document_number="1",
+ is_available=True,
+ )
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+
+ # Run the indexer.
+ with time_machine.travel(self.mock_date, tick=False):
+ documents_indexed = index_daily_recap_documents(
+ self.r,
+ DocketDocument._index._name,
+ RECAPSweepDocument,
+ testing=True,
+ )
+ self.assertEqual(
+ documents_indexed, 7, msg="Wrong number of documents indexed."
+ )
+
+ # Index a RECAPDocument changed today including its parent Docket
+ # indexed on previous days.
+ with time_machine.travel(mock_two_days_before, tick=False):
+ docket_2 = DocketFactory(
+ court=self.court,
+ case_name="SUBPOENAS SERVED CASE OFF",
+ docket_number="1:21-bk-1250",
+ source=Docket.RECAP,
+ )
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+
+ # Its related RD is ingested today.
+ with time_machine.travel(self.mock_date, tick=False):
+ alert_de_2 = DocketEntryWithParentsFactory(
+ docket=docket_2,
+ entry_number=1,
+ date_filed=datetime.date(2024, 8, 19),
+ description="MOTION for Leave to File Amicus Curiae Lorem Served",
+ )
+ rd_2 = RECAPDocumentFactory(
+ docket_entry=alert_de_2,
+ description="Motion to File Lorem",
+ document_number="2",
+ )
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+
+ # Run the indexer.
+ with time_machine.travel(self.mock_date, tick=False):
+ documents_indexed = index_daily_recap_documents(
+ self.r,
+ DocketDocument._index._name,
+ RECAPSweepDocument,
+ testing=True,
+ )
+ self.assertEqual(
+ documents_indexed, 9, msg="Wrong number of documents indexed."
+ )
+
+ # Docket and RD created on previous days, will be used later to confirm
+ # documents got indexed into the sweep index after partial updates.
+ three_days_before = now() - datetime.timedelta(days=5)
+ mock_three_days_before = three_days_before.replace(hour=5)
+ with time_machine.travel(
+ mock_three_days_before, tick=False
+ ), self.captureOnCommitCallbacks(execute=True):
+ docket_old = DocketFactory(
+ court=self.court,
+ case_name="SUBPOENAS SERVED LOREM OFF",
+ docket_number="1:21-bk-1254",
+ source=Docket.RECAP,
+ )
+ alert_de_old = DocketEntryWithParentsFactory(
+ docket=docket_old,
+ entry_number=1,
+ date_filed=datetime.date(2024, 8, 19),
+ description="MOTION for Leave to File Amicus Curiae Lorem Served",
+ )
+ rd_old = RECAPDocumentFactory(
+ docket_entry=alert_de_old,
+ description="Motion to File",
+ document_number="1",
+ is_available=True,
+ )
+ rd_old_2 = RECAPDocumentFactory(
+ docket_entry=alert_de_old,
+ description="Motion to File 2",
+ document_number="2",
+ is_available=True,
+ )
+
+ # Run the indexer. No new documents re_indexed.
+ with time_machine.travel(self.mock_date, tick=False):
+ documents_indexed = index_daily_recap_documents(
+ self.r,
+ DocketDocument._index._name,
+ RECAPSweepDocument,
+ testing=True,
+ )
+ self.assertEqual(
+ documents_indexed, 9, msg="Wrong number of documents indexed."
+ )
+
+ # Update the documents today:
+ with time_machine.travel(
+ self.mock_date, tick=False
+ ), self.captureOnCommitCallbacks(execute=True):
+ rd_old_2.document_number = 3
+ rd_old_2.save()
+
+ # Run the indexer. No new documents re_indexed.
+ with time_machine.travel(self.mock_date, tick=False):
+ documents_indexed = index_daily_recap_documents(
+ self.r,
+ DocketDocument._index._name,
+ RECAPSweepDocument,
+ testing=True,
+ )
+ self.assertEqual(
+ documents_indexed, 11, msg="Wrong number of documents indexed."
+ )
+
+ # Update the Docket today:
+ with time_machine.travel(
+ self.mock_date, tick=False
+ ), self.captureOnCommitCallbacks(execute=True):
+ docket_old.case_name = "SUBPOENAS SERVED LOREM OFF UPDATED"
+ docket_old.save()
+
+ # Run the indexer. No new documents re_indexed.
+ with time_machine.travel(self.mock_date, tick=False):
+ documents_indexed = index_daily_recap_documents(
+ self.r,
+ DocketDocument._index._name,
+ RECAPSweepDocument,
+ testing=True,
+ )
+ self.assertEqual(
+ documents_indexed, 12, msg="Wrong number of documents indexed."
+ )
+
+ docket_old.delete()
+ docket.delete()
+ docket_2.delete()
+
+ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
+ """Test RECAP alerts can be properly filtered out according to
+ their query and hits matched conditions.
+
+ - Docket-only Alerts should be triggered only if the Docket was
+ modified on the day. This prevents sending Alerts due to related
+ RDs added on the same day which can match the query due to parent
+ fields indexed into the RDs.
+ - The Docket or RD shouldn’t have triggered the alert previously.
+ - RECAP-only Alerts should only include RDs that have not triggered the
+ same alert previously. If there are no hits after filtering RDs,
+ don’t send the alert.
+ - Cross-object queries should only include RDs that have not triggered
+ the same alert previously. If there are no hits after filtering RDs,
+ don’t send the alert.
+
+ Assert the content structure accordingly.
+ """
+
+ # This docket-only alert matches a Docket ingested today.
+ docket_only_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Docket Only",
+ query='q="401 Civil"&type=r',
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ self.assertEqual(
+ len(mail.outbox), 1, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[0])
+ self.assertIn(docket_only_alert.name, html_content)
+ self._confirm_number_of_alerts(html_content, 1)
+ # The docket-only alert doesn't contain any nested child hits.
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ docket_only_alert.name,
+ 1,
+ self.de.docket.case_name,
+ 0,
+ )
+
+ # Assert email text version:
+ txt_email = mail.outbox[0].body
+ self.assertIn(docket_only_alert.name, txt_email)
+
+ # The following test shouldn't match the Docket-only alert when the RD
+ # is added today since its parent Docket was not modified today.
+ AlertFactory(
+ user=self.user_profile_2.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Docket Only Not Triggered",
+ query='q="405 Civil"&type=r',
+ )
+ # Simulate docket is ingested a day before.
+ one_day_before = self.mock_date - datetime.timedelta(days=1)
+ with time_machine.travel(one_day_before, tick=False):
+ docket = DocketFactory(
+ court=self.court,
+ case_name="SUBPOENAS SERVED CASE",
+ case_name_full="Jackson & Sons Holdings vs. Bank",
+ docket_number="1:21-bk-1234",
+ nature_of_suit="440",
+ source=Docket.RECAP,
+ cause="405 Civil",
+ jurisdiction_type="'U.S. Government Defendant",
+ jury_demand="1,000,000",
+ )
+
+ # Its related RD is ingested today.
+ with time_machine.travel(self.mock_date, tick=False):
+ alert_de = DocketEntryWithParentsFactory(
+ docket=docket,
+ entry_number=1,
+ date_filed=datetime.date(2024, 8, 19),
+ description="MOTION for Leave to File Amicus Curiae Lorem Served",
+ )
+ rd = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Motion to File",
+ document_number="1",
+ is_available=True,
+ page_count=5,
+ pacer_doc_id="018036652436",
+ plain_text="plain text for 018036652436",
+ )
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+ # The RD ingestion's shouldn't match the docket-only alert.
+ self.assertEqual(
+ len(mail.outbox), 1, msg="Outgoing emails don't match."
+ )
+
+ # Test a RECAP-only alert query.
+ recap_only_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert RECAP Only",
+ query='q="plain text for 018036652436"&type=r',
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+ # 1 New alert should be triggered.
+ self.assertEqual(
+ len(mail.outbox), 2, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[1])
+ self._confirm_number_of_alerts(html_content, 1)
+ # Only one child hit should be included in the case within the alert.
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ recap_only_alert.name,
+ 1,
+ alert_de.docket.case_name,
+ 1,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ recap_only_alert.name,
+ alert_de.docket.case_name,
+ [rd.description],
+ )
+ # Assert email text version:
+ txt_email = mail.outbox[1].body
+ self.assertIn(recap_only_alert.name, txt_email)
+ self.assertIn(rd.description, txt_email)
+
+ # Trigger the same alert again to confirm that no new alert is
+ # triggered because previous hits have already triggered the same alert
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+ # No new alert should be triggered.
+ self.assertEqual(
+ len(mail.outbox), 2, msg="Outgoing emails don't match."
+ )
+
+ with time_machine.travel(self.mock_date, tick=False):
+ # Create a new RD for the same DocketEntry to confirm this new RD is
+ # properly included in the alert email.
+ rd_2 = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Motion to File 2",
+ document_number="2",
+ is_available=True,
+ page_count=3,
+ pacer_doc_id="018036652436",
+ plain_text="plain text for 018036652436",
+ )
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # A new alert should be triggered containing only the new RD created.
+ self.assertEqual(
+ len(mail.outbox), 3, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[2])
+ self._confirm_number_of_alerts(html_content, 1)
+ self._assert_child_hits_content(
+ html_content,
+ recap_only_alert.name,
+ alert_de.docket.case_name,
+ [rd_2.description],
+ )
+
+ # The following test confirms that hits previously matched with other
+ # alerts can match a different alert.
+ recap_only_alert_2 = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert RECAP Only Docket Entry",
+ query=f"q=docket_entry_id:{alert_de.pk}&type=r",
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # A new alert should be triggered containing two RDs (rd and rd_2)
+ self.assertEqual(
+ len(mail.outbox), 4, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[3])
+ self._confirm_number_of_alerts(html_content, 1)
+ self._assert_child_hits_content(
+ html_content,
+ recap_only_alert_2.name,
+ alert_de.docket.case_name,
+ [rd.description, rd_2.description],
+ )
+ # Assert email text version:
+ txt_email = mail.outbox[3].body
+ self.assertIn(recap_only_alert.name, txt_email)
+ self.assertIn(rd.description, txt_email)
+ self.assertIn(rd_2.description, txt_email)
+
+ # The following test confirms that a cross-object alert is properly
+ # matched and triggered
+ cross_object_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Cross-object query",
+ query=f'q="Motion to File 2"&docket_number={docket.docket_number}&type=r',
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # A new alert should be triggered containing one RD (rd_2)
+ self.assertEqual(
+ len(mail.outbox), 5, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[4])
+ self._confirm_number_of_alerts(html_content, 1)
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert.name,
+ alert_de.docket.case_name,
+ [rd_2.description],
+ )
+ # Assert email text version:
+ txt_email = mail.outbox[4].body
+ self.assertIn(cross_object_alert.name, txt_email)
+ self.assertIn(rd_2.description, txt_email)
+
+ docket.delete()
+
+ def test_special_cross_object_alerts_or_clause(self) -> None:
+ """This test confirms that hits are properly filtered out or included
+ in alerts for special cross-object alerts that can match either a
+ Docket-only hit and/or Docket + RDs simultaneously in the same hit.
+ These cases include queries that use an OR clause combining
+ Docket field + RD fields.
+ """
+
+ # The following test confirms that an alert with a query that can match
+ # a Docket or RECAPDocuments from different cases simultaneously are
+ # properly filtered.
+ cross_object_alert_d_or_rd_field = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Cross-object query",
+ query=f"q=docket_id:{self.de.docket.pk} OR pacer_doc_id:{self.rd_2.pacer_doc_id}&type=r",
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # A new alert should be triggered containing a Docket-only hit and a
+ # Docket with the nested RD matched.
+ self.assertEqual(
+ len(mail.outbox), 1, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[0])
+ self._confirm_number_of_alerts(html_content, 1)
+
+ # This hit should only display the Docket matched by its ID,
+ # no RECAPDocument should be matched.
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_d_or_rd_field.name,
+ 2,
+ self.de.docket.case_name,
+ 0,
+ )
+ # The second hit should display the rd_2 nested below its parent docket.
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_d_or_rd_field.name,
+ self.de_1.docket.case_name,
+ [self.rd_2.description],
+ )
+ # Assert email text version:
+ txt_email = mail.outbox[0].body
+ self.assertIn(cross_object_alert_d_or_rd_field.name, txt_email)
+ self.assertIn(self.rd_2.description, txt_email)
+
+ # This test confirms that we're able to trigger cross-object alerts
+ # that include an OR clause and match documents that belong to the
+ # same case.
+ cross_object_alert_d_or_rd_field_same_case = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Cross-object query",
+ query=f"q=docket_id:{self.de.docket.pk} OR pacer_doc_id:{self.rd.pacer_doc_id}&type=r",
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # A new alert should be triggered, containing the RD document nested
+ # below its parent docket.
+ self.assertEqual(
+ len(mail.outbox), 2, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[1])
+ self._confirm_number_of_alerts(html_content, 1)
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_d_or_rd_field.name,
+ 1,
+ self.de.docket.case_name,
+ 1,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_d_or_rd_field_same_case.name,
+ self.de.docket.case_name,
+ [self.rd.description],
+ )
+
+ def test_special_cross_object_alerts_text_query(self) -> None:
+ """This test confirms that hits are properly filtered out or included
+ in alerts for special cross-object alerts that can match either a
+ Docket-only hit and/or Docket + RDs simultaneously in the same hit.
+ These cases include queries that use a text query that can match a
+ Docket and RD field simultaneously.
+ """
+
+ # This test confirms a text query cross-object alert matches documents
+ # according to trigger conditions like indexed date and previous triggers
+ # by the same document.
+ cross_object_alert_text = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Cross-object text query",
+ query=f'q="United states"&type=r',
+ )
+ two_days_before = self.mock_date - datetime.timedelta(days=2)
+ mock_two_days_before = two_days_before.replace(hour=5)
+ with time_machine.travel(
+ mock_two_days_before, tick=False
+ ), self.captureOnCommitCallbacks(execute=True):
+ docket = DocketFactory(
+ court=self.court,
+ case_name="United States of America",
+ docket_number="1:21-bk-1009",
+ source=Docket.RECAP,
+ )
+
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # No alert should be triggered since the matched docket was not
+ # modified during the current day.
+ self.assertEqual(
+ len(mail.outbox), 0, msg="Outgoing emails don't match."
+ )
+
+ # Index new documents that match cross_object_alert_text, an RD, and
+ # an empty docket.
+ with time_machine.travel(
+ self.mock_date, tick=False
+ ), self.captureOnCommitCallbacks(execute=True):
+ alert_de = DocketEntryWithParentsFactory(
+ docket=docket,
+ entry_number=1,
+ date_filed=datetime.date(2024, 8, 19),
+ description="MOTION for Leave to File",
+ )
+ rd_3 = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Motion to File New",
+ document_number="2",
+ pacer_doc_id="018036652875",
+ plain_text="United states Lorem",
+ )
+
+ docket_2 = DocketFactory(
+ court=self.court,
+ case_name="United States vs Lorem",
+ docket_number="1:21-bk-1008",
+ source=Docket.RECAP,
+ )
+
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # An alert should be triggered containing two hits. One matched by
+ # the rd_3 plain text description and one matched by docket_2 case_name
+ self.assertEqual(
+ len(mail.outbox), 1, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[0])
+ self._confirm_number_of_alerts(html_content, 1)
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_text.name,
+ 2,
+ docket.case_name,
+ 1,
+ )
+ # rd_3 should appear nested in this hit.
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_text.name,
+ docket.case_name,
+ [rd_3.description],
+ )
+ # The docket_2 hit shouldn't contain RDs.
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_text.name,
+ docket_2.case_name,
+ [],
+ )
+ # Modify 1:21-bk-1009 docket today:
+ with time_machine.travel(
+ self.mock_date, tick=False
+ ), self.captureOnCommitCallbacks(execute=True):
+ docket.cause = "405 Civil"
+ docket.save()
+
+ # Trigger the alert again:
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # A new alert should be triggered containing the docket as a hit with
+ # no nested RDs.
+ html_content = self.get_html_content_from_email(mail.outbox[1])
+ self.assertEqual(
+ len(mail.outbox), 2, msg="Outgoing emails don't match."
+ )
+ self._confirm_number_of_alerts(html_content, 1)
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_text.name,
+ 1,
+ docket.case_name,
+ 0,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_text.name,
+ docket.case_name,
+ [],
+ )
+
+ # Trigger alert again:
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+ # No new alerts should be triggered.
+ self.assertEqual(
+ len(mail.outbox), 2, msg="Outgoing emails don't match."
+ )
+
+ # Index new documents that match cross_object_alert_text, an RD, and
+ # an empty docket.
+ with time_machine.travel(
+ self.mock_date, tick=False
+ ), self.captureOnCommitCallbacks(execute=True):
+ rd_4 = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Hearing new",
+ document_number="3",
+ pacer_doc_id="0180366528790",
+ plain_text="Lorem ipsum",
+ )
+ rd_5 = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Hearing new 2",
+ document_number="4",
+ pacer_doc_id="018026657750",
+ plain_text="United states of america plain text",
+ )
+
+ # This test confirms that we're able to trigger cross-object alerts
+ # that include an OR clause and a cross-object text query.
+ cross_object_alert_d_or_rd_field_text_query = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Cross-object query combined.",
+ query=f"q=docket_id:{self.de.docket.pk} OR "
+ f"pacer_doc_id:{self.rd.pacer_doc_id} OR "
+ f'("United States of America" OR '
+ f"pacer_doc_id:{rd_3.pacer_doc_id})&type=r",
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # A new alert should be triggered, containing the RD document nested below
+ # its parent docket.
+ html_content = self.get_html_content_from_email(mail.outbox[2])
+ self.assertEqual(
+ len(mail.outbox), 3, msg="Outgoing emails don't match."
+ )
+ # The email contains two alerts: one for cross_object_alert_text
+ # triggered by the new rd_5 added, and one for cross_object_alert_d_or_rd_field_text_query.
+ self._confirm_number_of_alerts(html_content, 2)
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_text.name,
+ 1,
+ docket.case_name,
+ 1,
+ )
+ # The cross_object_alert_d_or_rd_field_text_query alert contains two
+ # hits. The first one matches "docket" and rd_3 and rd_5 nested below
+ # due to the OR clause in the text query, and the second hit matches
+ # self.de.docket and self.rd.
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_d_or_rd_field_text_query.name,
+ 2,
+ docket.case_name,
+ 2,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_d_or_rd_field_text_query.name,
+ docket.case_name,
+ [rd_3.description, rd_5.description],
+ )
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_d_or_rd_field_text_query.name,
+ self.de.docket.case_name,
+ [self.rd.description],
+ )
+
+ # This test confirms that hits are properly filtered when using AND in
+ # the text query.
+ cross_object_alert_d_or_rd_field_text_query_and = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Cross-object query combined.",
+ query=f'q=("United States of America" AND '
+ f"pacer_doc_id:{rd_3.pacer_doc_id})&type=r",
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ), time_machine.travel(self.mock_date, tick=False):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # A new alert should be triggered, containing rd_3 document nested below
+ # its parent docket.
+ html_content = self.get_html_content_from_email(mail.outbox[3])
+ self.assertEqual(
+ len(mail.outbox), 4, msg="Outgoing emails don't match."
+ )
+ self._confirm_number_of_alerts(html_content, 1)
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_d_or_rd_field_text_query_and.name,
+ 1,
+ docket.case_name,
+ 1,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_d_or_rd_field_text_query_and.name,
+ docket.case_name,
+ [rd_3.description],
+ )
+
+ docket.delete()
+ docket_2.delete()
+
+ def test_limit_alert_case_child_hits(self) -> None:
+ """Test limit case child hits up to 5 and display the "View additional
+ results for this Case" button.
+ """
+
+ with time_machine.travel(self.mock_date, tick=False):
+ alert_de = DocketEntryWithParentsFactory(
+ docket=self.de.docket,
+ entry_number=1,
+ date_filed=datetime.date(2024, 8, 19),
+ description="MOTION for Leave to File Amicus Curiae Lorem Served",
+ )
+ rd_descriptions = []
+ for i in range(4):
+ rd = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description=f"Motion to File {i+1}",
+ document_number=f"{i+1}",
+ pacer_doc_id=f"018036652436{i+1}",
+ )
+ if i < 3:
+ # Omit the last alert to compare. Only up to 3 should be
+ # included in the case.
+ rd_descriptions.append(rd.description)
+
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+ recap_only_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert RECAP Only Docket Entry",
+ query=f"q=docket_entry_id:{alert_de.pk}&type=r",
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ self.assertEqual(
+ len(mail.outbox), 1, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[0])
+ self.assertIn(recap_only_alert.name, html_content)
+ self._confirm_number_of_alerts(html_content, 1)
+ # The case alert should contain up to 3 child hits.
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ recap_only_alert.name,
+ 1,
+ self.de.docket.case_name,
+ 3,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ recap_only_alert.name,
+ alert_de.docket.case_name,
+ rd_descriptions,
+ )
+ # Assert the View more results button is present in the alert.
+ self.assertIn("View Additional Results for this Case", html_content)
+
+ # Assert email text version:
+ txt_email = mail.outbox[0].body
+ self.assertIn(recap_only_alert.name, txt_email)
+ for description in rd_descriptions:
+ with self.subTest(
+ description=description, msg="Plain text descriptions"
+ ):
+ self.assertIn(
+ description,
+ txt_email,
+ msg="RECAPDocument wasn't found in the email content.",
+ )
+
+ self.assertIn("View Additional Results for this Case", txt_email)
+
+ alert_de.delete()
+
+ @override_settings(SCHEDULED_ALERT_HITS_LIMIT=3)
+ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
+ """Test multiple alerts can be grouped in an email and hits within an
+ alert are limited to SCHEDULED_ALERT_HITS_LIMIT (3) hits.
+ """
+
+ docket = DocketFactory(
+ court=self.court,
+ case_name=f"SUBPOENAS SERVED CASE",
+ docket_number=f"1:21-bk-123",
+ source=Docket.RECAP,
+ cause="410 Civil",
+ )
+ dockets_created = []
+ for i in range(3):
+ docket_created = DocketFactory(
+ court=self.court,
+ case_name=f"SUBPOENAS SERVED CASE {i}",
+ docket_number=f"1:21-bk-123{i}",
+ source=Docket.RECAP,
+ cause="410 Civil",
+ )
+ dockets_created.append(docket_created)
+
+ alert_de = DocketEntryWithParentsFactory(
+ docket=docket,
+ entry_number=1,
+ date_filed=datetime.date(2024, 8, 19),
+ description="MOTION for Leave to File Amicus Curiae Lorem Served",
+ )
+ rd = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Motion to File",
+ document_number="1",
+ pacer_doc_id="018036652439",
+ )
+ rd_2 = RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Motion to File 2",
+ document_number="2",
+ pacer_doc_id="018036652440",
+ plain_text="plain text lorem",
+ )
+
+ docket_only_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Docket Only",
+ query='q="410 Civil"&type=r',
+ )
+ recap_only_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert RECAP Only Docket Entry",
+ query=f"q=docket_entry_id:{alert_de.pk}&type=r",
+ )
+ cross_object_alert_with_hl = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Cross-object",
+ query=f'q="File Amicus Curiae" AND "Motion to File 2" AND '
+ f'"plain text lorem" AND "410 Civil" AND '
+ f"id:{rd_2.pk}&docket_number={docket.docket_number}"
+ f'&case_name="{docket.case_name}"&type=r',
+ )
+ AlertFactory(
+ user=self.user_profile_2.user,
+ rate=Alert.REAL_TIME,
+ name="Test Alert Cross-object",
+ query=f'q="File Amicus Curiae" AND "Motion to File 2" AND '
+ f'"plain text lorem" AND "410 Civil" AND '
+ f"id:{rd_2.pk}&docket_number={docket.docket_number}"
+ f'&case_name="{docket.case_name}"&type=r',
+ )
+
+ call_command(
+ "cl_index_parent_and_child_docs",
+ search_type=SEARCH_TYPES.RECAP,
+ queue="celery",
+ pk_offset=0,
+ testing_mode=True,
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ self.assertEqual(
+ len(mail.outbox), 2, msg="Outgoing emails don't match."
+ )
+
+ # Assert webhooks.
+ webhook_events = WebhookEvent.objects.all().values_list(
+ "content", flat=True
+ )
+ self.assertEqual(len(webhook_events), 3)
+
+ # Assert docket-only alert.
+ html_content = self.get_html_content_from_email(mail.outbox[0])
+ self.assertIn(docket_only_alert.name, html_content)
+ self._confirm_number_of_alerts(html_content, 3)
+ # The docket-only alert doesn't contain any nested child hits.
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ docket_only_alert.name,
+ 3,
+ docket.case_name,
+ 0,
+ )
+ self._count_webhook_hits_and_child_hits(
+ list(webhook_events),
+ docket_only_alert.name,
+ 3,
+ docket.case_name,
+ 0,
+ )
+
+ # Assert RECAP-only alert.
+ self.assertIn(recap_only_alert.name, html_content)
+ # The recap-only alert contain 2 child hits.
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ recap_only_alert.name,
+ 1,
+ alert_de.docket.case_name,
+ 2,
+ )
+ self._count_webhook_hits_and_child_hits(
+ list(webhook_events),
+ recap_only_alert.name,
+ 1,
+ alert_de.docket.case_name,
+ 2,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ recap_only_alert.name,
+ alert_de.docket.case_name,
+ [rd.description, rd_2.description],
+ )
+
+ # Assert Cross-object alert.
+ self.assertIn(recap_only_alert.name, html_content)
+ # The cross-object alert only contain 1 child hit.
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_with_hl.name,
+ 1,
+ alert_de.docket.case_name,
+ 1,
+ )
+ self._count_webhook_hits_and_child_hits(
+ list(webhook_events),
+ cross_object_alert_with_hl.name,
+ 1,
+ alert_de.docket.case_name,
+ 1,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_with_hl.name,
+ alert_de.docket.case_name,
+ [rd_2.description],
+ )
+
+ # Assert HL in the cross_object_alert_with_hl
+ self.assertIn(f"{docket.case_name}", html_content)
+ self.assertEqual(
+ html_content.count(f"{docket.case_name}"), 1
+ )
+ self.assertIn(f"{docket.docket_number}", html_content)
+ self.assertEqual(
+ html_content.count(f"{docket.docket_number}"), 1
+ )
+ self.assertIn(f"{rd_2.plain_text}", html_content)
+ self.assertEqual(
+ html_content.count(f"{rd_2.plain_text}"), 1
+ )
+ self.assertIn(f"{rd_2.description}", html_content)
+ self.assertEqual(
+ html_content.count(f"{rd_2.description}"), 1
+ )
+ self.assertIn("File Amicus Curiae", html_content)
+ self.assertEqual(
+ html_content.count("File Amicus Curiae"), 1
+ )
+
+ # Assert email text version:
+ txt_email = mail.outbox[0].body
+ self.assertIn(recap_only_alert.name, txt_email)
+ self.assertIn(docket_only_alert.name, txt_email)
+ self.assertIn(cross_object_alert_with_hl.name, txt_email)
+ for description in [rd.description, rd_2.description]:
+ with self.subTest(
+ description=description, msg="Plain text descriptions"
+ ):
+ self.assertIn(
+ description,
+ txt_email,
+ msg="RECAPDocument wasn't found in the email content.",
+ )
+
+ docket.delete()
+ for d in dockets_created:
+ d.delete()
+
+ def test_schedule_wly_and_mly_recap_alerts(self) -> None:
+ """Test Weekly and Monthly RECAP Search Alerts are scheduled daily
+ before being sent later.
+ """
+
+ docket_only_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.WEEKLY,
+ name="Test Alert Docket Only",
+ query='q="401 Civil"&type=r',
+ )
+ recap_only_alert = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.MONTHLY,
+ name="Test Alert RECAP Only Docket Entry",
+ query=f"q=docket_entry_id:{self.de.pk}&type=r",
+ )
+ cross_object_alert_with_hl = AlertFactory(
+ user=self.user_profile.user,
+ rate=Alert.WEEKLY,
+ name="Test Alert Cross-object",
+ query=f'q="401 Civil" id:{self.rd.pk}&type=r',
+ )
+ with mock.patch(
+ "cl.api.webhooks.requests.post",
+ side_effect=lambda *args, **kwargs: MockResponse(
+ 200, mock_raw=True
+ ),
+ ):
+ call_command("cl_send_recap_alerts", testing_mode=True)
+
+ # Weekly and monthly alerts are not sent right away but are scheduled as
+ # ScheduledAlertHit to be sent by the cl_send_scheduled_alerts command.
+ self.assertEqual(
+ len(mail.outbox), 0, msg="Outgoing emails don't match."
+ )
+ schedule_alerts = ScheduledAlertHit.objects.all()
+ self.assertEqual(schedule_alerts.count(), 3)
+
+ # Webhooks are send immediately as hits are matched.
+ webhook_events = WebhookEvent.objects.all().values_list(
+ "content", flat=True
+ )
+ self.assertEqual(len(webhook_events), 3)
+
+ # Send scheduled Weekly alerts and check assertions.
+ call_command("cl_send_scheduled_alerts", rate=Alert.WEEKLY)
+ self.assertEqual(
+ len(mail.outbox), 1, msg="Outgoing emails don't match."
+ )
+ # Assert docket-only alert.
+ html_content = self.get_html_content_from_email(mail.outbox[0])
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ docket_only_alert.name,
+ 1,
+ self.de.docket.case_name,
+ 0,
+ )
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ cross_object_alert_with_hl.name,
+ 1,
+ self.de.docket.case_name,
+ 1,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ cross_object_alert_with_hl.name,
+ self.de.docket.case_name,
+ [self.rd.description],
+ )
+ # Assert email text version:
+ txt_email = mail.outbox[0].body
+ self.assertIn(docket_only_alert.name, txt_email)
+ self.assertIn(cross_object_alert_with_hl.name, txt_email)
+ self.assertIn(self.rd.description, txt_email)
+
+ # Send scheduled Monthly alerts and check assertions.
+ current_date = now().replace(day=28, hour=0)
+ with time_machine.travel(current_date, tick=False):
+ call_command("cl_send_scheduled_alerts", rate=Alert.MONTHLY)
+ self.assertEqual(
+ len(mail.outbox), 2, msg="Outgoing emails don't match."
+ )
+ html_content = self.get_html_content_from_email(mail.outbox[1])
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ recap_only_alert.name,
+ 1,
+ self.de.docket.case_name,
+ 2,
+ )
+ self._assert_child_hits_content(
+ html_content,
+ recap_only_alert.name,
+ self.de.docket.case_name,
+ [self.rd.description, self.rd_att.description],
+ )
+ # Assert email text version:
+ txt_email = mail.outbox[1].body
+ self.assertIn(recap_only_alert.name, txt_email)
+ self.assertIn(self.rd.description, txt_email)
+ self.assertIn(self.rd_att.description, txt_email)
+
+ def test_alert_frequency_estimation(self):
+ """Test alert frequency ES API endpoint for RECAP Alerts."""
+
+ search_params = {
+ "type": SEARCH_TYPES.RECAP,
+ "q": "Frequency Test RECAP",
+ }
+ r = self.client.get(
+ reverse(
+ "alert_frequency", kwargs={"version": "4", "day_count": "100"}
+ ),
+ search_params,
+ )
+ self.assertEqual(r.json()["count"], 0)
+ with time_machine.travel(
+ self.mock_date, tick=False
+ ), self.captureOnCommitCallbacks(execute=True):
+ # Docket filed today.
+ docket = DocketFactory(
+ court=self.court,
+ case_name="Frequency Test RECAP",
+ docket_number="1:21-bk-1240",
+ source=Docket.RECAP,
+ date_filed=now().date(),
+ )
+
+ # RECAPDocument filed today that belongs to a docket filed outside
+ # the estimation range.
+ date_outside_range = now() - datetime.timedelta(days=102)
+ alert_de = DocketEntryWithParentsFactory(
+ docket=DocketFactory(
+ court=self.court,
+ case_name="Frequency Test RECAP",
+ docket_number="1:21-bk-1245",
+ source=Docket.RECAP,
+ date_filed=date_outside_range.date(),
+ ),
+ entry_number=1,
+ date_filed=now().date(),
+ )
+ RECAPDocumentFactory(
+ docket_entry=alert_de,
+ description="Frequency Test RECAP",
+ document_number="1",
+ pacer_doc_id="018036652450",
+ )
+
+ r = self.client.get(
+ reverse(
+ "alert_frequency", kwargs={"version": "4", "day_count": "100"}
+ ),
+ search_params,
+ )
+ # 2 expected hits in the last 100 days. One docket filed today + one
+ # RECAPDocument filed today.
+ self.assertEqual(r.json()["count"], 2)
+
+ docket.delete()
+ alert_de.docket.delete()
diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py
index d287b3627a..4b97fc7ba8 100644
--- a/cl/alerts/utils.py
+++ b/cl/alerts/utils.py
@@ -4,7 +4,8 @@
from django.conf import settings
from django.http import QueryDict
from elasticsearch_dsl import Q, Search
-from elasticsearch_dsl.response import Response
+from elasticsearch_dsl.response import Hit, Response
+from redis import Redis
from cl.alerts.models import (
SCHEDULED_ALERT_HIT_STATUS,
@@ -16,7 +17,6 @@
from cl.lib.elasticsearch_utils import add_es_highlighting
from cl.search.documents import AudioPercolator
from cl.search.models import SEARCH_TYPES, Docket
-from cl.users.models import UserProfile
@dataclass
@@ -25,6 +25,14 @@ class DocketAlertReportObject:
docket: Docket
+@dataclass
+class TaskCompletionStatus:
+ completed: bool = False
+ created: int = 0
+ total: int = 0
+ start_time_millis: int | None = None
+
+
class OldAlertReport:
def __init__(self):
self.old_alerts = []
@@ -138,3 +146,44 @@ def alert_hits_limit_reached(alert_pk: int, user_pk: int) -> bool:
)
return True
return False
+
+
+def make_alert_set_key(alert_id: int, document_type: str) -> str:
+ """Generate a Redis key for storing alert hits.
+
+ :param alert_id: The ID of the alert.
+ :param document_type: The type of document associated with the alert.
+ :return: A Redis key string in the format "alert_hits:{alert_id}.{document_type}".
+ """
+ return f"alert_hits:{alert_id}.{document_type}"
+
+
+def add_document_hit_to_alert_set(
+ r: Redis, alert_id: int, document_type: str, document_id: int
+) -> None:
+ """Add a document ID to the Redis SET associated with an alert ID.
+
+ :param r: Redis client instance.
+ :param alert_id: The alert identifier.
+ :param document_type: The type of document associated with the alert.
+ :param document_id: The docket identifier to add.
+ :return: None
+ """
+ alert_key = make_alert_set_key(alert_id, document_type)
+ r.sadd(alert_key, document_id)
+
+
+def has_document_alert_hit_been_triggered(
+ r: Redis, alert_id: int, document_type: str, document_id: int
+) -> bool:
+ """Check if a document ID is a member of the Redis SET associated with an
+ alert ID.
+
+ :param r: Redis client instance.
+ :param alert_id: The alert identifier.
+ :param document_type: The type of document associated with the alert.
+ :param document_id: The docket identifier to check.
+ :return: True if the docket ID is a member of the set, False otherwise.
+ """
+ alert_key = make_alert_set_key(alert_id, document_type)
+ return r.sismember(alert_key, document_id)
diff --git a/cl/api/tasks.py b/cl/api/tasks.py
index b70420fe95..7f0b8d2cdd 100644
--- a/cl/api/tasks.py
+++ b/cl/api/tasks.py
@@ -1,6 +1,8 @@
import json
+from collections import defaultdict
from typing import Any
+from elasticsearch_dsl.response import Hit
from rest_framework.renderers import JSONRenderer
from cl.alerts.api_serializers import SearchAlertSerializerModel
@@ -10,9 +12,12 @@
from cl.api.webhooks import send_webhook_event
from cl.celery_init import app
from cl.corpus_importer.api_serializers import DocketEntrySerializer
-from cl.search.api_serializers import V3OAESResultSerializer
+from cl.search.api_serializers import (
+ RECAPESResultSerializer,
+ V3OAESResultSerializer,
+)
from cl.search.api_utils import ResultObject
-from cl.search.models import DocketEntry
+from cl.search.models import SEARCH_TYPES, DocketEntry
@app.task()
@@ -77,6 +82,7 @@ def send_docket_alert_webhook_events(
send_webhook_event(webhook_event, json_bytes)
+# TODO: Remove after scheduled OA alerts have been processed.
@app.task()
def send_es_search_alert_webhook(
results: list[dict[str, Any]],
@@ -116,3 +122,66 @@ def send_es_search_alert_webhook(
content=post_content,
)
send_webhook_event(webhook_event, json_bytes)
+
+
+@app.task()
+def send_search_alert_webhook_es(
+ results: list[dict[str, Any]] | list[Hit],
+ webhook_pk: int,
+ alert_pk: int,
+) -> None:
+ """Send a search alert webhook event containing search results from a
+ search alert object.
+
+ :param results: The search results returned by SOLR for this alert.
+ :param webhook_pk: The webhook endpoint ID object to send the event to.
+ :param alert_pk: The search alert ID.
+ """
+
+ webhook = Webhook.objects.get(pk=webhook_pk)
+ alert = Alert.objects.get(pk=alert_pk)
+ serialized_alert = SearchAlertSerializerModel(alert).data
+ match alert.alert_type:
+ case SEARCH_TYPES.ORAL_ARGUMENT:
+ es_results = []
+ for result in results:
+ result["snippet"] = result["text"]
+ es_results.append(ResultObject(initial=result))
+ serialized_results = V3OAESResultSerializer(
+ es_results, many=True
+ ).data
+ case SEARCH_TYPES.RECAP:
+ for result in results:
+ child_result_objects = []
+ if hasattr(result, "child_docs"):
+ for child_doc in result.child_docs:
+ child_result_objects.append(
+ defaultdict(
+ lambda: None, child_doc["_source"].to_dict()
+ )
+ )
+ result["child_docs"] = child_result_objects
+ serialized_results = RECAPESResultSerializer(
+ results, many=True
+ ).data
+ case _:
+ # No implemented alert type.
+ return None
+
+ post_content = {
+ "webhook": generate_webhook_key_content(webhook),
+ "payload": {
+ "results": serialized_results,
+ "alert": serialized_alert,
+ },
+ }
+ renderer = JSONRenderer()
+ json_bytes = renderer.render(
+ post_content,
+ accepted_media_type="application/json;",
+ )
+ webhook_event = WebhookEvent.objects.create(
+ webhook=webhook,
+ content=post_content,
+ )
+ send_webhook_event(webhook_event, json_bytes)
diff --git a/cl/api/urls.py b/cl/api/urls.py
index e407d4b94a..c8d1c6a7ea 100644
--- a/cl/api/urls.py
+++ b/cl/api/urls.py
@@ -322,7 +322,7 @@
name="coverage_data_opinions",
),
re_path(
- r"^api/rest/v(?P[123])/alert-frequency/(?P\d+)/$",
+ r"^api/rest/v(?P[1234])/alert-frequency/(?P\d+)/$",
views.get_result_count,
name="alert_frequency",
),
diff --git a/cl/api/views.py b/cl/api/views.py
index e0b6987654..29aa0c40b6 100644
--- a/cl/api/views.py
+++ b/cl/api/views.py
@@ -21,7 +21,11 @@
build_coverage_query,
get_solr_interface,
)
-from cl.search.documents import AudioDocument, OpinionClusterDocument
+from cl.search.documents import (
+ AudioDocument,
+ DocketDocument,
+ OpinionClusterDocument,
+)
from cl.search.forms import SearchForm
from cl.search.models import SEARCH_TYPES, Citation, Court, OpinionCluster
from cl.simple_pages.coverage_utils import build_chart_data
@@ -278,7 +282,10 @@ async def get_result_count(request, version, day_count):
es_flag_for_o = await sync_to_async(waffle.flag_is_active)(
request, "o-es-active"
)
- is_es_form = es_flag_for_oa or es_flag_for_o
+ es_flag_for_r = await sync_to_async(waffle.flag_is_active)(
+ request, "recap-alerts-active"
+ )
+ is_es_form = es_flag_for_oa or es_flag_for_o or es_flag_for_r
search_form = await sync_to_async(SearchForm)(
request.GET.copy(), is_es_form=is_es_form
)
@@ -303,6 +310,12 @@ async def get_result_count(request, version, day_count):
total_query_results = await sync_to_async(
do_es_alert_estimation_query
)(search_query, cd, day_count)
+ case SEARCH_TYPES.RECAP if es_flag_for_r:
+ # Elasticsearch version for RECAP
+ search_query = DocketDocument.search()
+ total_query_results = await sync_to_async(
+ do_es_alert_estimation_query
+ )(search_query, cd, day_count)
case _:
@sync_to_async
diff --git a/cl/corpus_importer/management/commands/ready_mix_cases_project.py b/cl/corpus_importer/management/commands/ready_mix_cases_project.py
index eabb93f4a4..32c9db7ae3 100644
--- a/cl/corpus_importer/management/commands/ready_mix_cases_project.py
+++ b/cl/corpus_importer/management/commands/ready_mix_cases_project.py
@@ -198,7 +198,8 @@ def query_results_in_es(options):
}
search_query = DocketDocument.search()
- s, _ = build_es_base_query(search_query, cd)
+ es_queries = build_es_base_query(search_query, cd)
+ s = es_queries.search_query
s = s.extra(size=options["results_size"])
response = s.execute().to_dict()
extracted_data = [
diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index a63191495a..39d535b2df 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -1,10 +1,13 @@
import random
import re
+import urllib.parse
from datetime import datetime
+import waffle
from django import template
from django.core.exceptions import ValidationError
from django.template import Context
+from django.template.context import RequestContext
from django.template.defaultfilters import date as date_filter
from django.utils.formats import date_format
from django.utils.html import format_html
@@ -12,7 +15,8 @@
from django.utils.safestring import SafeString, mark_safe
from elasticsearch_dsl import AttrDict, AttrList
-from cl.search.models import Court, Docket, DocketEntry
+from cl.search.constants import ALERTS_HL_TAG, SEARCH_HL_TAG
+from cl.search.models import SEARCH_TYPES, Court, Docket, DocketEntry
register = template.Library()
@@ -130,11 +134,25 @@ def random_int(a: int, b: int) -> int:
@register.filter
-def get_attrdict(mapping, key):
- """Emulates the dictionary get for AttrDict objects. Useful when keys
- have spaces or other punctuation."""
+def get_es_doc_content(
+ mapping: AttrDict | dict, scheduled_alert: bool = False
+) -> AttrDict | dict | str:
+ """
+ Returns the ES document content placed in the "_source" field if the
+ document is an AttrDict, or just returns the content if it's not necessary
+ to extract from "_source" such as in scheduled alerts where the content is
+ a dict.
+
+ :param mapping: The AttrDict or dict instance to extract the content from.
+ :param scheduled_alert: A boolean indicating if the content belongs to a
+ scheduled alert where the content is already in place.
+ :return: The ES document content.
+ """
+
+ if scheduled_alert:
+ return mapping
try:
- return mapping[key]
+ return mapping["_source"]
except KeyError:
return ""
@@ -200,13 +218,15 @@ def citation(obj) -> SafeString:
@register.simple_tag
-def contains_highlights(content: str) -> bool:
+def contains_highlights(content: str, alert: bool = False) -> bool:
"""Check if a given string contains the mark tag used in highlights.
:param content: The input string to check.
+ :param alert: Whether this tag is being used in the alert template.
:return: True if the mark highlight tag is found, otherwise False.
"""
- pattern = r".*?"
+ hl_tag = ALERTS_HL_TAG if alert else SEARCH_HL_TAG
+ pattern = rf"<{hl_tag}>.*?{hl_tag}>"
matches = re.findall(pattern, content)
return bool(matches)
@@ -247,6 +267,35 @@ def get_highlight(result: AttrDict | dict[str, any], field: str) -> any:
return render_string_or_list(hl_value) if hl_value else original_value
+@register.simple_tag
+def extract_q_value(query: str) -> str:
+ """Extract the value of the "q" parameter from a URL-encoded query string.
+
+ :param query: The URL-encoded query string.
+ :return: The value of the "q" parameter or an empty string if "q" is not found.
+ """
+
+ parsed_query = urllib.parse.parse_qs(query)
+ return parsed_query.get("q", [""])[0]
+
+
+@register.simple_tag(takes_context=True)
+def alerts_supported(context: RequestContext, search_type: str) -> str:
+ """Determine if search alerts are supported based on the search type and flag
+ status.
+
+ :param context: The template context, which includes the request, required
+ for the waffle flag.
+ :param search_type: The type of search being performed.
+ :return: True if alerts are supported, False otherwise.
+ """
+
+ request = context["request"]
+ if search_type == SEARCH_TYPES.RECAP:
+ return waffle.flag_is_active(request, "recap-alerts-active")
+ return search_type in (SEARCH_TYPES.OPINION, SEARCH_TYPES.ORAL_ARGUMENT)
+
+
@register.filter
def group_courts(courts: list[Court], num_columns: int) -> list:
"""Divide courts in equal groupings while keeping related courts together
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 835bd52c36..c12b9c0058 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -35,6 +35,7 @@
ApiPositionMapping,
BasePositionMapping,
CleanData,
+ EsMainQueries,
ESRangeQueryParams,
)
from cl.lib.utils import (
@@ -132,8 +133,8 @@ def build_numeric_range_query(
def build_daterange_query(
field: str,
- before: datetime.date,
- after: datetime.date,
+ before: datetime.date | str,
+ after: datetime.date | str,
relation: Literal["INTERSECTS", "CONTAINS", "WITHIN", None] = None,
) -> list[Range]:
"""Given field name and date range limits returns ElasticSearch range query or None
@@ -148,9 +149,9 @@ def build_daterange_query(
params = {}
if any([before, after]):
- if hasattr(after, "strftime"):
+ if isinstance(after, datetime.date):
params["gte"] = f"{after.isoformat()}T00:00:00Z"
- if hasattr(before, "strftime"):
+ if isinstance(before, datetime.date):
params["lte"] = f"{before.isoformat()}T23:59:59Z"
if relation is not None:
allowed_relations = ["INTERSECTS", "CONTAINS", "WITHIN"]
@@ -931,6 +932,7 @@ def build_has_child_query(
order_by: tuple[str, str] | None = None,
child_highlighting: bool = True,
default_current_date: datetime.date | None = None,
+ alerts: bool = False,
) -> QueryString:
"""Build a 'has_child' query.
@@ -943,6 +945,7 @@ def build_has_child_query(
:param child_highlighting: Whether highlighting should be enabled in child docs.
:param default_current_date: The default current date to use for computing
a stable date score across pagination in the V4 Search API.
+ :param alerts: If highlighting is being applied to search Alerts hits.
:return: The 'has_child' query.
"""
@@ -959,8 +962,9 @@ def build_has_child_query(
default_current_date=default_current_date,
)
+ hl_tag = ALERTS_HL_TAG if alerts else SEARCH_HL_TAG
highlight_options, fields_to_exclude = build_highlights_dict(
- highlighting_fields, SEARCH_HL_TAG, child_highlighting
+ highlighting_fields, hl_tag, child_highlighting
)
inner_hits = {
@@ -1110,22 +1114,27 @@ def build_es_base_query(
cd: CleanData,
child_highlighting: bool = True,
api_version: Literal["v3", "v4"] | None = None,
-) -> tuple[Search, QueryString | None]:
+ alerts: bool = False,
+) -> EsMainQueries:
"""Builds filters and fulltext_query based on the given cleaned
data and returns an elasticsearch query.
:param search_query: The Elasticsearch search query object.
:param cd: The cleaned data object containing the query and filters.
- :param child_highlighting: Whether highlighting should be enabled in child docs.
+ :param child_highlighting: Whether highlighting should be enabled in child
+ docs.
:param api_version: Optional, the request API version.
- :return: A two-tuple, the Elasticsearch search query object and an ES
- QueryString for child documents, or None if there is no need to query
- child documents.
+ :param alerts: If highlighting is being applied to search Alerts hits.
+ :return: An `EsMainQueries` object containing the Elasticsearch search
+ query object and an ES QueryString for child documents or None if there is
+ no need to query child documents and a QueryString for parent documents or
+ None.
"""
main_query = None
string_query = None
- join_query = None
+ child_docs_query = None
+ parent_query = None
filters = []
plain_doc = False
match cd["type"]:
@@ -1170,12 +1179,14 @@ def build_es_base_query(
],
)
)
- main_query, join_query = build_full_join_es_queries(
- cd,
- child_query_fields,
- parent_query_fields,
- child_highlighting=child_highlighting,
- api_version=api_version,
+ main_query, child_docs_query, parent_query = (
+ build_full_join_es_queries(
+ cd,
+ child_query_fields,
+ parent_query_fields,
+ child_highlighting=child_highlighting,
+ api_version=api_version,
+ )
)
case (
@@ -1206,13 +1217,17 @@ def build_es_base_query(
],
)
)
- main_query, join_query = build_full_join_es_queries(
- cd,
- child_query_fields,
- parent_query_fields,
- child_highlighting=child_highlighting,
- api_version=api_version,
+ main_query, child_docs_query, parent_query = (
+ build_full_join_es_queries(
+ cd,
+ child_query_fields,
+ parent_query_fields,
+ child_highlighting=child_highlighting,
+ api_version=api_version,
+ alerts=alerts,
+ )
)
+
case SEARCH_TYPES.OPINION:
str_query = cd.get("q", "")
related_match = RELATED_PATTERN.search(str_query)
@@ -1222,15 +1237,21 @@ def build_es_base_query(
mlt_query = async_to_sync(build_more_like_this_query)(
cluster_pks
)
- main_query, join_query = build_full_join_es_queries(
- cd,
- {"opinion": []},
- [],
- mlt_query,
- child_highlighting=False,
- api_version=api_version,
+ main_query, child_docs_query, parent_query = (
+ build_full_join_es_queries(
+ cd,
+ {"opinion": []},
+ [],
+ mlt_query,
+ child_highlighting=False,
+ api_version=api_version,
+ )
+ )
+ return EsMainQueries(
+ search_query=search_query.query(main_query),
+ parent_query=parent_query,
+ child_query=child_docs_query,
)
- return search_query.query(main_query), join_query
opinion_search_fields = SEARCH_OPINION_QUERY_FIELDS
child_fields = opinion_search_fields.copy()
@@ -1256,13 +1277,15 @@ def build_es_base_query(
],
)
)
- main_query, join_query = build_full_join_es_queries(
- cd,
- child_query_fields,
- parent_query_fields,
- mlt_query,
- child_highlighting=child_highlighting,
- api_version=api_version,
+ main_query, child_docs_query, parent_query = (
+ build_full_join_es_queries(
+ cd,
+ child_query_fields,
+ parent_query_fields,
+ mlt_query,
+ child_highlighting=child_highlighting,
+ api_version=api_version,
+ )
)
if not any([filters, string_query, main_query]):
@@ -1271,7 +1294,11 @@ def build_es_base_query(
match_all_query = get_match_all_query(
cd, search_query, api_version, child_highlighting
)
- return match_all_query, join_query
+ return EsMainQueries(
+ search_query=match_all_query,
+ parent_query=parent_query,
+ child_query=child_docs_query,
+ )
if plain_doc:
# Combine the filters and string query for plain documents like Oral
@@ -1280,7 +1307,11 @@ def build_es_base_query(
cd, filters, string_query, api_version
)
- return search_query.query(main_query), join_query
+ return EsMainQueries(
+ search_query=search_query.query(main_query),
+ parent_query=parent_query,
+ child_query=child_docs_query,
+ )
def build_has_parent_parties_query(
@@ -1310,7 +1341,7 @@ def build_has_parent_parties_query(
def build_child_docs_query(
- join_query: QueryString | None,
+ child_docs_query: QueryString | None,
cd: CleanData,
exclude_docs_for_empty_field: str = "",
) -> QueryString:
@@ -1320,7 +1351,7 @@ def build_child_docs_query(
to retrieve child documents directly, such as in the Opinions Feed,
RECAP Feed, RECAP Documents count query, and V4 RECAP_DOCUMENT Search API.
- :param join_query: Existing Elasticsearch QueryString object or None
+ :param child_docs_query: Existing Elasticsearch QueryString object or None
:param cd: The user input CleanedData
:param exclude_docs_for_empty_field: Field that should not be empty for a
document to be included
@@ -1338,7 +1369,7 @@ def build_child_docs_query(
]
parties_has_parent_query = build_has_parent_parties_query(parties_filters)
- if not join_query:
+ if not child_docs_query:
# Match all query case.
if not exclude_docs_for_empty_field:
if cd["type"] == SEARCH_TYPES.OPINION:
@@ -1360,7 +1391,7 @@ def build_child_docs_query(
filters.append(child_query_recap)
return Q("bool", filter=filters)
- query_dict = join_query.to_dict()
+ query_dict = child_docs_query.to_dict()
if "filter" in query_dict["bool"]:
existing_filter = query_dict["bool"]["filter"]
if cd["type"] == SEARCH_TYPES.OPINION:
@@ -1422,7 +1453,8 @@ def get_facet_dict_for_search_query(
"""
cd["just_facets_query"] = True
- search_query, _ = build_es_base_query(search_query, cd)
+ es_queries = build_es_base_query(search_query, cd)
+ search_query = es_queries.search_query
search_query.aggs.bucket("status", A("terms", field="status.raw"))
search_query = search_query.extra(size=0)
response = search_query.execute()
@@ -1444,7 +1476,9 @@ def build_es_main_query(
applicable.
"""
search_query_base = search_query
- search_query, join_query = build_es_base_query(search_query, cd)
+ es_queries = build_es_base_query(search_query, cd)
+ search_query = es_queries.search_query
+ child_docs_query = es_queries.child_query
top_hits_limit = 5
child_docs_count_query = None
match cd["type"]:
@@ -1462,7 +1496,9 @@ def build_es_main_query(
top_hits_limit,
)
case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS:
- child_docs_count_query = build_child_docs_query(join_query, cd)
+ child_docs_count_query = build_child_docs_query(
+ child_docs_query, cd
+ )
if child_docs_count_query:
# Get the total RECAP Documents count.
child_docs_count_query = search_query_base.query(
@@ -2145,7 +2181,7 @@ def fetch_es_results(
return [], 0, error, None, None
-def build_has_child_filters(cd: CleanData) -> list[QueryString]:
+def build_has_child_filters(cd: CleanData) -> list[QueryString | Range]:
"""Builds Elasticsearch 'has_child' filters based on the given child type
and CleanData.
@@ -2181,6 +2217,8 @@ def build_has_child_filters(cd: CleanData) -> list[QueryString]:
description = cd.get("description", "")
document_number = cd.get("document_number", "")
attachment_number = cd.get("attachment_number", "")
+ entry_date_filed_after = cd.get("entry_date_filed_after", "")
+ entry_date_filed_before = cd.get("entry_date_filed_before", "")
if available_only:
queries_list.extend(
@@ -2199,6 +2237,14 @@ def build_has_child_filters(cd: CleanData) -> list[QueryString]:
queries_list.extend(
build_term_query("attachment_number", attachment_number)
)
+ if entry_date_filed_after or entry_date_filed_before:
+ queries_list.extend(
+ build_daterange_query(
+ "entry_date_filed",
+ entry_date_filed_before,
+ entry_date_filed_after,
+ )
+ )
return queries_list
@@ -2358,13 +2404,15 @@ def build_search_feed_query(
hl_field = "text"
if cd["type"] == SEARCH_TYPES.RECAP:
hl_field = "plain_text"
- s, join_query = build_es_base_query(search_query, cd)
+ es_queries = build_es_base_query(search_query, cd)
+ s = es_queries.search_query
+ child_docs_query = es_queries.child_query
if jurisdiction or cd["type"] == SEARCH_TYPES.RECAP:
# An Opinion Jurisdiction feed or RECAP Search displays child documents
# Eliminate items that lack the ordering field and apply highlighting
# to create a snippet for the plain_text or text fields.
s = build_child_docs_query(
- join_query,
+ child_docs_query,
cd=cd,
exclude_docs_for_empty_field=exclude_docs_for_empty_field,
)
@@ -2479,7 +2527,8 @@ def build_full_join_es_queries(
mlt_query: Query | None = None,
child_highlighting: bool = True,
api_version: Literal["v3", "v4"] | None = None,
-) -> tuple[QueryString | list, QueryString | None]:
+ alerts: bool = False,
+) -> tuple[QueryString | list, QueryString | None, QueryString | None]:
"""Build a complete Elasticsearch query with both parent and child document
conditions.
@@ -2489,7 +2538,9 @@ def build_full_join_es_queries(
:param mlt_query: the More Like This Query object.
:param child_highlighting: Whether highlighting should be enabled in child docs.
:param api_version: Optional, the request API version.
- :return: An Elasticsearch QueryString object.
+ :param alerts: If highlighting is being applied to search Alerts hits.
+ :return: A three-tuple: the main join query, the child documents query, and
+ the parent documents query.
"""
q_should = []
@@ -2505,7 +2556,8 @@ def build_full_join_es_queries(
case SEARCH_TYPES.PEOPLE:
child_type = "position"
- join_query = None
+ child_docs_query = None
+ parent_query = None
if cd["type"] in [
SEARCH_TYPES.RECAP,
SEARCH_TYPES.DOCKETS,
@@ -2548,7 +2600,7 @@ def build_full_join_es_queries(
query
for query in parent_filters
if not isinstance(query, QueryString)
- or query.fields[0] not in ["party", "attorney"]
+ or query.fields[0] not in ["party", "attorney", "firm"]
]
)
if parties_filters:
@@ -2562,18 +2614,18 @@ def build_full_join_es_queries(
case [], []:
pass
case [], _:
- join_query = Q(
+ child_docs_query = Q(
"bool",
should=child_text_query,
minimum_should_match=1,
)
case _, []:
- join_query = Q(
+ child_docs_query = Q(
"bool",
filter=child_filters,
)
case _, _:
- join_query = Q(
+ child_docs_query = Q(
"bool",
filter=child_filters,
should=child_text_query,
@@ -2589,13 +2641,14 @@ def build_full_join_es_queries(
(child_highlighting, cd["type"]), {}
)
has_child_query = build_has_child_query(
- join_query,
+ child_docs_query,
child_type,
query_hits_limit,
hl_fields,
get_function_score_sorting_key(cd, api_version),
child_highlighting=child_highlighting,
default_current_date=cd.get("request_date"),
+ alerts=alerts,
)
if parties_filters and not has_child_query:
@@ -2610,6 +2663,7 @@ def build_full_join_es_queries(
SEARCH_RECAP_CHILD_HL_FIELDS,
get_function_score_sorting_key(cd, api_version),
default_current_date=cd.get("request_date"),
+ alerts=alerts,
)
if has_child_query:
@@ -2666,9 +2720,9 @@ def build_full_join_es_queries(
q_should.append(parent_query)
if not q_should:
- return [], join_query
+ return [], child_docs_query, parent_query
- final_query = apply_custom_score_to_main_query(
+ main_join_query = apply_custom_score_to_main_query(
cd,
Q(
"bool",
@@ -2676,10 +2730,7 @@ def build_full_join_es_queries(
),
api_version,
)
- return (
- final_query,
- join_query,
- )
+ return (main_join_query, child_docs_query, parent_query)
def limit_inner_hits(
@@ -2916,12 +2967,12 @@ def do_es_api_query(
child documents.
"""
- child_docs_query = None
-
try:
- s, join_query = build_es_base_query(
+ es_queries = build_es_base_query(
search_query, cd, cd["highlight"], api_version
)
+ s = es_queries.search_query
+ child_docs_query = es_queries.child_query
except (
UnbalancedParenthesesQuery,
UnbalancedQuotesQuery,
@@ -2938,7 +2989,7 @@ def do_es_api_query(
# Note that in V3 Case Law Search, opinions are collapsed by cluster_id
# meaning that only one result per cluster is shown.
s = build_child_docs_query(
- join_query,
+ child_docs_query,
cd=cd,
)
main_query = search_query.query(s)
@@ -2974,7 +3025,7 @@ def do_es_api_query(
)
else:
child_docs_query = build_child_docs_query(
- join_query,
+ child_docs_query,
cd=cd,
)
# Build query params for the ES V4 Search API endpoints.
@@ -3076,7 +3127,7 @@ def do_es_alert_estimation_query(
"""
match cd["type"]:
- case SEARCH_TYPES.OPINION:
+ case SEARCH_TYPES.OPINION | SEARCH_TYPES.RECAP:
after_field = "filed_after"
before_field = "filed_before"
case SEARCH_TYPES.ORAL_ARGUMENT:
@@ -3089,11 +3140,123 @@ def do_es_alert_estimation_query(
days=int(day_count)
)
cd[before_field] = None
- estimation_query, _ = build_es_base_query(search_query, cd)
+ es_queries = build_es_base_query(search_query, cd)
+ estimation_query = es_queries.search_query
+ if cd["type"] == SEARCH_TYPES.RECAP:
+ # The RECAP estimation query consists of two requests: one to estimate
+ # Docket hits and one to estimate RECAPDocument hits.
+ del cd[after_field]
+ del cd[before_field]
+ cd["entry_date_filed_after"] = (
+ datetime.date.today() - datetime.timedelta(days=int(day_count))
+ )
+ cd["entry_date_filed_before"] = None
+
+ main_doc_count_query = clean_count_query(estimation_query)
+ main_doc_count_query = main_doc_count_query.extra(
+ size=0, track_total_hits=True
+ )
+
+ # Perform the two queries in a single request.
+ multi_search = MultiSearch()
+ multi_search = multi_search.add(main_doc_count_query)
+
+ # Build RECAPDocuments count query.
+ es_queries = build_es_base_query(search_query, cd)
+ child_docs_query = es_queries.child_query
+ child_docs_count_query = build_child_docs_query(child_docs_query, cd)
+ child_total = 0
+ if child_docs_count_query:
+ child_docs_count_query = search_query.query(child_docs_count_query)
+ child_total_query = child_docs_count_query.extra(
+ size=0, track_total_hits=True
+ )
+ multi_search = multi_search.add(child_total_query)
+
+ responses = multi_search.execute()
+ parent_total = responses[0].hits.total.value
+ if child_docs_count_query:
+ child_doc_count_response = responses[1]
+ child_total = child_doc_count_response.hits.total.value
+ total_recap_estimation = parent_total + child_total
+ return total_recap_estimation
return estimation_query.count()
+def do_es_sweep_alert_query(
+ search_query: Search,
+ child_search_query: Search,
+ cd: CleanData,
+) -> tuple[list[Hit] | None, Response | None, Response | None]:
+ """Build an ES query for its use in the daily RECAP sweep index.
+
+ :param search_query: Elasticsearch DSL Search object.
+ :param child_search_query: The Elasticsearch DSL search query to perform
+ the child-only query.
+ :param cd: The query CleanedData
+ :return: A two-tuple, the Elasticsearch search query object and an ES
+ Query for child documents, or None if there is no need to query
+ child documents.
+ """
+
+ search_form = SearchForm(cd, is_es_form=True)
+ if search_form.is_valid():
+ cd = search_form.cleaned_data
+ else:
+ return None, None, None
+ es_queries = build_es_base_query(search_query, cd, True, alerts=True)
+ s = es_queries.search_query
+ parent_query = es_queries.parent_query
+ child_query = es_queries.child_query
+ main_query = add_es_highlighting(s, cd, alerts=True)
+ main_query = main_query.sort(build_sort_results(cd))
+ main_query = main_query.extra(
+ from_=0, size=settings.SCHEDULED_ALERT_HITS_LIMIT
+ )
+
+ multi_search = MultiSearch()
+ multi_search = multi_search.add(main_query)
+ if parent_query:
+ parent_search = search_query.query(parent_query)
+ parent_search = parent_search.extra(
+ from_=0, size=settings.SCHEDULED_ALERT_HITS_LIMIT
+ )
+ parent_search = parent_search.source(includes=["docket_id"])
+ multi_search = multi_search.add(parent_search)
+
+ if child_query:
+ child_search = child_search_query.query(child_query)
+ child_search = child_search.extra(
+ from_=0,
+ size=settings.SCHEDULED_ALERT_HITS_LIMIT
+ * settings.RECAP_CHILD_HITS_PER_RESULT,
+ )
+ child_search = child_search.source(includes=["id"])
+ multi_search = multi_search.add(child_search)
+
+ responses = multi_search.execute()
+ main_results = responses[0]
+ rd_results = None
+ docket_results = None
+ if parent_query:
+ docket_results = responses[1]
+ if child_query:
+ rd_results = responses[2]
+
+ limit_inner_hits({}, main_results, cd["type"])
+ set_results_highlights(main_results, cd["type"])
+
+ for result in main_results:
+ child_result_objects = []
+ if hasattr(result, "child_docs"):
+ for child_doc in result.child_docs:
+ child_result_objects.append(child_doc.to_dict())
+ result["child_docs"] = child_result_objects
+
+ return main_results, docket_results, rd_results
+
+
def compute_lowest_possible_estimate(precision_threshold: int) -> int:
"""Estimates can be below reality by as much as 6%. Round numbers below that threshold.
:return: The lowest possible estimate.
diff --git a/cl/lib/types.py b/cl/lib/types.py
index 82d6131b5e..ff257574e9 100644
--- a/cl/lib/types.py
+++ b/cl/lib/types.py
@@ -4,6 +4,8 @@
from typing import Any, Callable, Dict, List, NotRequired, TypedDict, Union
from django.http import HttpRequest
+from django_elasticsearch_dsl.search import Search
+from elasticsearch_dsl.query import QueryString
from cl.users.models import User
@@ -190,6 +192,13 @@ def get_db_to_dataclass_map(self):
return self.__db_to_dataclass_map
+@dataclass
+class EsMainQueries:
+ search_query: Search
+ parent_query: QueryString | None = None
+ child_query: QueryString | None = None
+
+
@dataclass
class ApiPositionMapping(BasePositionMapping):
position_type_dict: defaultdict[int, list[str]] = field(
diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 6eccba198e..80244e7865 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -1792,6 +1792,15 @@ async def merge_attachment_page_data(
if not all(sanity_checks):
continue
+ # Missing on some restricted docs (see Juriscraper)
+ # Attachment 0 may not have page count since it is the main rd.
+ if (
+ "page_count" in attachment
+ and attachment["page_count"] is None
+ and attachment["attachment_number"] != 0
+ ):
+ continue
+
# Appellate entries with attachments don't have a main RD, transform it
# to an attachment. In ACMS attachment pages, all the documents use the
# same pacer_doc_id, so we need to make sure only one is matched to the
diff --git a/cl/search/constants.py b/cl/search/constants.py
index a62617f031..333dfbca6c 100644
--- a/cl/search/constants.py
+++ b/cl/search/constants.py
@@ -160,7 +160,6 @@
"referredTo": 0,
"suitNature": 0,
}
-
SEARCH_OPINION_HL_FIELDS = {
"caseName": 0,
"citation": 0,
@@ -307,6 +306,7 @@
Opinion.TRIAL_COURT: "trial-court-document",
}
+
cardinality_query_unique_ids = {
SEARCH_TYPES.RECAP: "docket_id",
SEARCH_TYPES.DOCKETS: "docket_id",
diff --git a/cl/search/documents.py b/cl/search/documents.py
index e059b58f22..59d8327875 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -1,8 +1,10 @@
from datetime import datetime
+from django.conf import settings
from django.http import QueryDict
from django.utils.html import escape, strip_tags
from django_elasticsearch_dsl import Document, fields
+from elasticsearch_dsl import Document as DSLDocument
from cl.alerts.models import Alert
from cl.audio.models import Audio
@@ -363,8 +365,8 @@ def prepare_percolator_query(self, instance):
cd = search_form.cleaned_data
search_query = AudioDocument.search()
- query, _ = build_es_base_query(search_query, cd)
- return query.to_dict()["query"]
+ es_queries = build_es_base_query(search_query, cd)
+ return es_queries.search_query.to_dict()["query"]
class ES_CHILD_ID:
@@ -960,8 +962,7 @@ def prepare_timestamp(self, instance):
return datetime.utcnow()
-@recap_index.document
-class ESRECAPDocument(DocketBaseDocument):
+class ESRECAPBaseDocument(DSLDocument):
id = fields.IntegerField(attr="pk")
docket_entry_id = fields.IntegerField(attr="docket_entry.pk")
description = fields.TextField(
@@ -1029,6 +1030,10 @@ class ESRECAPDocument(DocketBaseDocument):
fields.IntegerField(multi=True),
)
+
+@recap_index.document
+class ESRECAPDocument(DocketBaseDocument, ESRECAPBaseDocument):
+
class Django:
model = RECAPDocument
ignore_signals = True
@@ -1834,3 +1839,24 @@ def prepare_non_participating_judge_ids(self, instance):
def prepare_cluster_child(self, instance):
return "opinion_cluster"
+
+
+class RECAPSweepDocument(DocketDocument, ESRECAPDocument):
+ class Index:
+ name = "recap_sweep"
+ settings = {
+ "number_of_shards": settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS,
+ "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
+ "analysis": settings.ELASTICSEARCH_DSL["analysis"],
+ }
+
+
+class ESRECAPSweepDocument(ESRECAPBaseDocument):
+
+ class Index:
+ name = "recap_document_sweep"
+ settings = {
+ "number_of_shards": settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS,
+ "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
+ "analysis": settings.ELASTICSEARCH_DSL["analysis"],
+ }
diff --git a/cl/search/forms.py b/cl/search/forms.py
index 7ff40a7f3e..7dcd91eaeb 100644
--- a/cl/search/forms.py
+++ b/cl/search/forms.py
@@ -221,6 +221,16 @@ class SearchForm(forms.Form):
),
)
atty_name.as_str_types = [SEARCH_TYPES.RECAP]
+ firm_name = forms.CharField(
+ required=False,
+ label="Firm Name",
+ widget=forms.TextInput(
+ attrs={
+ "class": "external-input form-control",
+ "autocomplete": "off",
+ },
+ ),
+ )
#
# Oral argument fields
diff --git a/cl/search/management/commands/cl_index_parent_and_child_docs.py b/cl/search/management/commands/cl_index_parent_and_child_docs.py
index abb528a3b0..366d9fe13e 100644
--- a/cl/search/management/commands/cl_index_parent_and_child_docs.py
+++ b/cl/search/management/commands/cl_index_parent_and_child_docs.py
@@ -526,7 +526,9 @@ def process_queryset(
match task_to_use:
case "index_parent_and_child_docs":
index_parent_and_child_docs.si(
- chunk, search_type, testing_mode=testing_mode
+ chunk,
+ search_type,
+ testing_mode=testing_mode,
).set(queue=queue).apply_async()
case "index_parent_or_child_docs":
diff --git a/cl/search/tasks.py b/cl/search/tasks.py
index c5daeb5073..fa6bf4fdaf 100644
--- a/cl/search/tasks.py
+++ b/cl/search/tasks.py
@@ -477,6 +477,13 @@ def document_fields_to_update(
continue
field_value = prepare_method(main_instance)
fields_to_update[field] = field_value
+
+ if fields_to_update:
+ # If fields to update, append the timestamp to be updated too.
+ prepare_timestamp = getattr(es_document(), f"prepare_timestamp", None)
+ if prepare_timestamp:
+ field_value = prepare_timestamp(main_instance)
+ fields_to_update["timestamp"] = field_value
return fields_to_update
@@ -777,9 +784,18 @@ def update_children_docs_by_query(
# Build the UpdateByQuery script and execute it
script_lines = []
params = {}
+ if fields_to_update:
+ # If there are fields to update include the timestamp field too.
+ fields_to_update.append("timestamp")
for field_to_update in fields_to_update:
field_list = (
- fields_map[field_to_update] if fields_map else [field_to_update]
+ ["timestamp"]
+ if field_to_update == "timestamp"
+ else (
+ fields_map[field_to_update]
+ if fields_map
+ else [field_to_update]
+ )
)
for field_name in field_list:
script_lines.append(
diff --git a/cl/search/templates/feeds/solr_desc_template.html b/cl/search/templates/feeds/solr_desc_template.html
index ce02928003..479d9fe4f5 100644
--- a/cl/search/templates/feeds/solr_desc_template.html
+++ b/cl/search/templates/feeds/solr_desc_template.html
@@ -7,7 +7,7 @@
{% else %}
{% flag "o-es-active" %}
{% if doc0.child_docs %}
- {% with doc=doc0.child_docs.0|get_attrdict:"_source" %}
+ {% with doc=doc0.child_docs.0|get_es_doc_content %}
{{ doc.text|render_string_or_list|safe|truncatewords:"500" }}
{% endwith %}
{% else %}
diff --git a/cl/search/templates/includes/pa_search_result.html b/cl/search/templates/includes/pa_search_result.html
index feecdd1be2..f290514af8 100644
--- a/cl/search/templates/includes/pa_search_result.html
+++ b/cl/search/templates/includes/pa_search_result.html
@@ -4,7 +4,7 @@
{% load humanize %}
{% for result in results.object_list %}
- {% with opinion=result.grouped_by_opinion_cluster_id.hits.hits.0|get_attrdict:"_source" %}
+ {% with opinion=result.grouped_by_opinion_cluster_id.hits.hits.0|get_es_doc_content %}