From 3e4f269c6230b8d5954cd922e32090e5866caf76 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 20 Jun 2024 19:22:43 -0600
Subject: [PATCH 01/33] fix(elasticsearch): Test RECAP nested index reliability

---
 cl/lib/elasticsearch_utils.py                 | 286 ++++++++++++-
 cl/search/api_serializers.py                  |  15 +
 cl/search/constants.py                        |   5 +
 cl/search/documents.py                        | 404 ++++++++++++++++++
 cl/search/es_indices.py                       |   9 +
 .../cl_index_parent_and_child_docs.py         |   7 +
 cl/search/tasks.py                            |   6 +-
 cl/search/tests/tests_es_recap.py             |  69 ++-
 8 files changed, 785 insertions(+), 16 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 24d49257f7..7f407441aa 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -4,6 +4,7 @@
 import re
 import time
 import traceback
+from collections import defaultdict
 from copy import deepcopy
 from dataclasses import fields
 from functools import reduce, wraps
@@ -68,6 +69,7 @@
     SEARCH_RECAP_CHILD_HL_FIELDS,
     SEARCH_RECAP_CHILD_QUERY_FIELDS,
     SEARCH_RECAP_HL_FIELDS,
+    SEARCH_RECAP_NESTED_CHILD_QUERY_FIELDS,
     SEARCH_RECAP_PARENT_QUERY_FIELDS,
     api_child_highlight_map,
 )
@@ -1066,6 +1068,7 @@ def build_es_base_query(
     cd: CleanData,
     child_highlighting: bool = True,
     api_version: Literal["v3", "v4"] | None = None,
+    nested_query: bool = False,
 ) -> tuple[Search, QueryString | None]:
     """Builds filters and fulltext_query based on the given cleaned
      data and returns an elasticsearch query.
@@ -1074,6 +1077,7 @@ def build_es_base_query(
     :param cd: The cleaned data object containing the query and filters.
     :param child_highlighting: Whether highlighting should be enabled in child docs.
     :param api_version: Optional, the request API version.
+    :param nested_query: Whether to perform a nested query.
     :return: A two-tuple, the Elasticsearch search query object and an ES
     QueryString for child documents, or None if there is no need to query
     child documents.
@@ -1151,6 +1155,15 @@ def build_es_base_query(
                     ],
                 )
             )
+            nested_child_fields = SEARCH_RECAP_NESTED_CHILD_QUERY_FIELDS.copy()
+            nested_child_fields.extend(
+                add_fields_boosting(
+                    cd,
+                    [
+                        "description",
+                    ],
+                )
+            )
             child_query_fields = {"recap_document": child_fields}
             parent_query_fields = SEARCH_RECAP_PARENT_QUERY_FIELDS.copy()
             parent_query_fields.extend(
@@ -1162,13 +1175,22 @@ def build_es_base_query(
                     ],
                 )
             )
-            main_query, join_query = build_full_join_es_queries(
-                cd,
-                child_query_fields,
-                parent_query_fields,
-                child_highlighting=child_highlighting,
-                api_version=api_version,
-            )
+
+            if nested_query:
+                main_query, _ = build_full_nested_es_queries(
+                    cd,
+                    nested_child_fields,
+                    parent_query_fields,
+                )
+            else:
+                main_query, join_query = build_full_join_es_queries(
+                    cd,
+                    child_query_fields,
+                    parent_query_fields,
+                    child_highlighting=child_highlighting,
+                    api_version=api_version,
+                )
+
         case SEARCH_TYPES.OPINION:
             str_query = cd.get("q", "")
             related_match = RELATED_PATTERN.search(str_query)
@@ -1984,11 +2006,14 @@ def fetch_es_results(
     return [], 0, error, None, None
 
 
-def build_has_child_filters(cd: CleanData) -> list[QueryString]:
+def build_has_child_filters(
+    cd: CleanData, nested_query=False
+) -> list[QueryString]:
     """Builds Elasticsearch 'has_child' filters based on the given child type
     and CleanData.
 
     :param cd: The user input CleanedData.
+    :param nested_query: Whether to perform a nested query.
     :return: A list of QueryString objects containing the 'has_child' filters.
     """
 
@@ -2022,22 +2047,36 @@ def build_has_child_filters(cd: CleanData) -> list[QueryString]:
         attachment_number = cd.get("attachment_number", "")
 
         if available_only:
+            field = (
+                "is_available"
+                if not nested_query
+                else "documents.is_available"
+            )
             queries_list.extend(
                 build_term_query(
-                    "is_available",
+                    field,
                     available_only,
                 )
             )
         if description:
-            queries_list.extend(build_text_filter("description", description))
+            field = (
+                "description" if not nested_query else "documents.description"
+            )
+            queries_list.extend(build_text_filter(field, description))
         if document_number:
-            queries_list.extend(
-                build_term_query("document_number", document_number)
+            field = (
+                "document_number"
+                if not nested_query
+                else "documents.document_number"
             )
+            queries_list.extend(build_term_query(field, document_number))
         if attachment_number:
-            queries_list.extend(
-                build_term_query("attachment_number", attachment_number)
+            field = (
+                "attachment_number"
+                if not nested_query
+                else "documents.attachment_number"
             )
+            queries_list.extend(build_term_query(field, attachment_number))
 
     return queries_list
 
@@ -3014,3 +3053,222 @@ def do_es_alert_estimation_query(
     estimation_query, _ = build_es_base_query(search_query, cd)
 
     return estimation_query.count()
+
+
+def build_nested_child_query(
+    query: QueryString | str,
+    child_type: str,
+    child_hits_limit: int,
+    highlighting_fields: dict[str, int] | None = None,
+) -> QueryString:
+    """Build a nested query.
+
+    :param query: The Elasticsearch query string or QueryString object.
+    :param child_type: The type of the child document.
+    :param child_hits_limit: The maximum number of child hits to be returned.
+    :param highlighting_fields: List of fields to highlight in child docs.
+    :return: The 'has_child' query.
+    """
+
+    highlight_options, fields_to_exclude = build_highlights_dict(
+        highlighting_fields, SEARCH_HL_TAG
+    )
+    inner_hits = {
+        "name": f"filter_query_inner_{child_type}",
+        "size": child_hits_limit,
+        "_source": {
+            "excludes": fields_to_exclude,
+        },
+    }
+    if highlight_options:
+        inner_hits["highlight"] = highlight_options
+
+    return Q(
+        "nested",
+        path="documents",
+        score_mode="max",
+        query=query,
+        inner_hits=inner_hits,
+    )
+
+
+def build_full_nested_es_queries(
+    cd: CleanData,
+    child_query_fields: list[str],
+    parent_query_fields: list[str],
+) -> tuple[QueryString | list, QueryString | None]:
+    """Build a complete Elasticsearch query with both parent and nested
+    documents conditions.
+
+    :param cd: The query CleanedData
+    :param child_query_fields: A dictionary mapping child fields document type.
+    :param parent_query_fields: A list of fields for the parent document.
+    :return: An Elasticsearch QueryString object.
+    """
+
+    q_should = []
+    child_query = None
+    if cd["type"] in [
+        SEARCH_TYPES.RECAP,
+        SEARCH_TYPES.DOCKETS,
+        SEARCH_TYPES.RECAP_DOCUMENT,
+        SEARCH_TYPES.OPINION,
+        SEARCH_TYPES.PEOPLE,
+    ]:
+        # Build child filters.
+        child_filters = build_has_child_filters(cd, nested_query=True)
+        # Copy the original child_filters before appending parent fields.
+        # For its use later in the parent filters.
+        child_filters_original = deepcopy(child_filters)
+        # Build child text query.
+        child_fields = [f"documents.{field}" for field in child_query_fields]
+        child_text_query = build_fulltext_query(
+            child_fields, cd.get("q", ""), only_queries=True
+        )
+
+        # Build parent filters.
+        parent_filters = build_join_es_filters(cd)
+
+        # Build the child query based on child_filters and child child_text_query
+        match child_filters, child_text_query:
+            case [], []:
+                pass
+            case [], _:
+                child_query = Q(
+                    "bool",
+                    should=child_text_query,
+                    minimum_should_match=1,
+                )
+            case _, []:
+                child_query = Q(
+                    "bool",
+                    filter=child_filters,
+                )
+            case _, _:
+                child_query = Q(
+                    "bool",
+                    filter=child_filters,
+                    should=child_text_query,
+                    minimum_should_match=1,
+                )
+
+        _, query_hits_limit = get_child_top_hits_limit(cd, cd["type"])
+        has_child_query = None
+        if child_text_query or child_filters:
+            hl_fields = api_child_highlight_map.get((True, cd["type"]), {})
+            has_child_query = build_nested_child_query(
+                child_query,
+                "recap_document",
+                query_hits_limit,
+                hl_fields,
+            )
+
+        if has_child_query:
+            q_should.append(has_child_query)
+
+        # Build the parent filter and text queries.
+        string_query = build_fulltext_query(
+            parent_query_fields, cd.get("q", ""), only_queries=True
+        )
+
+        # If child filters are set, add a nested query as a filter to the
+        # parent query to exclude results without matching children.
+        if child_filters_original:
+            parent_filters.append(
+                Q(
+                    "nested",
+                    path="documents",
+                    score_mode="max",
+                    query=Q("bool", filter=child_filters_original),
+                )
+            )
+        parent_query = None
+        match parent_filters, string_query:
+            case [], []:
+                pass
+            case [], _:
+                parent_query = Q(
+                    "bool",
+                    should=string_query,
+                    minimum_should_match=1,
+                )
+            case _, []:
+                parent_query = Q(
+                    "bool",
+                    filter=parent_filters,
+                )
+            case _, _:
+                parent_query = Q(
+                    "bool",
+                    filter=parent_filters,
+                    should=string_query,
+                    minimum_should_match=1,
+                )
+        if parent_query:
+            q_should.append(parent_query)
+
+    if not q_should:
+        return [], child_query
+
+    final_query = Q(
+        "bool",
+        should=q_should,
+    )
+    return (
+        final_query,
+        child_query,
+    )
+
+
+def do_es_sweep_nested_query(
+    search_query: Search,
+    cd: CleanData,
+) -> tuple[list[defaultdict] | None, int | None]:
+    """Build an ES query for its use in the daily RECAP sweep index.
+
+    :param search_query: Elasticsearch DSL Search object.
+    :param cd: The query CleanedData
+    :return: A two-tuple, the Elasticsearch search query object and an ES
+    Query for child documents, or None if there is no need to query
+    child documents.
+    """
+
+    search_form = SearchForm(cd, is_es_form=True)
+    if search_form.is_valid():
+        cd = search_form.cleaned_data
+    else:
+        return None, None
+
+    hits = None
+    try:
+        s, _ = build_es_base_query(
+            search_query,
+            cd,
+            True,
+            nested_query=True,
+        )
+    except (
+        UnbalancedParenthesesQuery,
+        UnbalancedQuotesQuery,
+        BadProximityQuery,
+    ) as e:
+        raise ElasticBadRequestError(detail=e.message)
+    main_query = add_es_highlighting(s, cd, highlighting=True)
+    main_query = main_query.extra(from_=0, size=30)
+    results = main_query.execute()
+    if results:
+        hits = results.hits.total.value
+
+    limit_inner_hits({}, results, cd["type"])
+    set_results_highlights(results, cd["type"])
+
+    for result in results:
+        child_result_objects = []
+        if hasattr(result, "child_docs"):
+            for child_doc in result.child_docs:
+                child_result_objects.append(
+                    defaultdict(lambda: None, child_doc["_source"].to_dict())
+                )
+            result["child_docs"] = child_result_objects
+
+    return results, hits
diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py
index 1f9cbb7d75..7c72dd5a2d 100644
--- a/cl/search/api_serializers.py
+++ b/cl/search/api_serializers.py
@@ -731,3 +731,18 @@ class Meta:
             "pacer_doc_id",
             "trustee_str",
         )
+
+
+class RECAPNestedResultSerializer(
+    RECAPMetaMixin, BaseDocketESResultSerializer
+):
+    """The serializer class for RECAP search type results."""
+
+    recap_documents = BaseRECAPDocumentESResultSerializer(
+        many=True, read_only=True, source="child_docs"
+    )
+
+    class Meta(BaseDocketESResultSerializer.Meta):
+        exclude = BaseDocketESResultSerializer.Meta.exclude + (
+            "docket_absolute_url",
+        )
diff --git a/cl/search/constants.py b/cl/search/constants.py
index 55012fa9f0..91b5cb93f0 100644
--- a/cl/search/constants.py
+++ b/cl/search/constants.py
@@ -96,6 +96,11 @@
     "chapter",
     "trustee_str",
 ]
+SEARCH_RECAP_NESTED_CHILD_QUERY_FIELDS = [
+    "short_description",
+    "plain_text",
+    "document_type",
+]
 SEARCH_OPINION_QUERY_FIELDS = [
     "court",
     "court_id",
diff --git a/cl/search/documents.py b/cl/search/documents.py
index d7b18f9472..5fada32689 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -29,12 +29,14 @@
     parenthetical_group_index,
     people_db_index,
     recap_index,
+    recap_sweep_index,
 )
 from cl.search.forms import SearchForm
 from cl.search.models import (
     BankruptcyInformation,
     Citation,
     Docket,
+    DocketEntry,
     Opinion,
     OpinionCluster,
     ParentheticalGroup,
@@ -1826,3 +1828,405 @@ def prepare_non_participating_judge_ids(self, instance):
 
     def prepare_cluster_child(self, instance):
         return "opinion_cluster"
+
+
+@recap_sweep_index.document
+class RECAPNestedDocument(Document):
+    # Docket Fields
+    docket_id = fields.IntegerField(attr="pk")
+    caseName = fields.TextField(
+        analyzer="text_en_splitting_cl",
+        term_vector="with_positions_offsets",
+        fields={
+            "exact": fields.TextField(
+                analyzer="english_exact",
+                search_analyzer="search_analyzer_exact",
+                term_vector="with_positions_offsets",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    case_name_full = fields.TextField(
+        attr="case_name_full",
+        analyzer="text_en_splitting_cl",
+        fields={
+            "exact": fields.TextField(
+                attr="case_name_full",
+                analyzer="english_exact",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    docketNumber = fields.TextField(
+        attr="docket_number",
+        analyzer="text_en_splitting_cl",
+        term_vector="with_positions_offsets",
+        fields={
+            "exact": fields.TextField(
+                attr="docket_number",
+                analyzer="english_exact",
+                term_vector="with_positions_offsets",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    suitNature = fields.TextField(
+        attr="nature_of_suit",
+        analyzer="text_en_splitting_cl",
+        term_vector="with_positions_offsets",
+        fields={
+            "exact": fields.TextField(
+                attr="nature_of_suit",
+                analyzer="english_exact",
+                term_vector="with_positions_offsets",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    cause = fields.TextField(
+        attr="cause",
+        analyzer="text_en_splitting_cl",
+        term_vector="with_positions_offsets",
+        fields={
+            "exact": fields.TextField(
+                attr="cause",
+                analyzer="english_exact",
+                term_vector="with_positions_offsets",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    juryDemand = fields.TextField(
+        attr="jury_demand",
+        analyzer="text_en_splitting_cl",
+        term_vector="with_positions_offsets",
+        fields={
+            "exact": fields.TextField(
+                attr="jury_demand",
+                analyzer="english_exact",
+                term_vector="with_positions_offsets",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    jurisdictionType = fields.TextField(
+        attr="jurisdiction_type",
+        analyzer="text_en_splitting_cl",
+        fields={
+            "exact": fields.TextField(
+                attr="jurisdiction_type",
+                analyzer="english_exact",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    dateArgued = fields.DateField(attr="date_argued")
+    dateFiled = fields.DateField(attr="date_filed")
+    dateTerminated = fields.DateField(attr="date_terminated")
+    assignedTo = fields.TextField(
+        analyzer="text_en_splitting_cl",
+        term_vector="with_positions_offsets",
+        fields={
+            "exact": fields.TextField(
+                analyzer="english_exact",
+                term_vector="with_positions_offsets",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    assigned_to_id = fields.KeywordField(attr="assigned_to.pk")
+    referredTo = fields.TextField(
+        analyzer="text_en_splitting_cl",
+        term_vector="with_positions_offsets",
+        fields={
+            "exact": fields.TextField(
+                analyzer="english_exact",
+                term_vector="with_positions_offsets",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    referred_to_id = fields.KeywordField(attr="referred_to.pk")
+    court = fields.TextField(
+        attr="court.full_name",
+        analyzer="text_en_splitting_cl",
+        fields={
+            "exact": fields.TextField(
+                attr="court.full_name",
+                analyzer="english_exact",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    court_id = fields.TextField(
+        attr="court.pk",
+        analyzer="text_en_splitting_cl",
+        fields={"raw": fields.KeywordField(attr="court.pk")},
+        search_analyzer="search_analyzer",
+    )
+    court_citation_string = fields.TextField(
+        attr="court.citation_string",
+        analyzer="text_en_splitting_cl",
+        search_analyzer="search_analyzer",
+        term_vector="with_positions_offsets",
+    )
+    chapter = fields.TextField(
+        analyzer="text_en_splitting_cl",
+        search_analyzer="search_analyzer",
+    )
+    trustee_str = fields.TextField(
+        analyzer="text_en_splitting_cl",
+        fields={
+            "exact": fields.TextField(
+                analyzer="english_exact",
+                search_analyzer="search_analyzer_exact",
+            ),
+        },
+        search_analyzer="search_analyzer",
+    )
+    date_created = fields.DateField(attr="date_created")
+    pacer_case_id = fields.KeywordField(attr="pacer_case_id")
+
+    # Parties
+    party_id = fields.ListField(fields.IntegerField(multi=True))
+    party = fields.ListField(
+        fields.TextField(
+            analyzer="text_en_splitting_cl",
+            fields={
+                "exact": fields.TextField(
+                    analyzer="english_exact",
+                    search_analyzer="search_analyzer_exact",
+                ),
+            },
+            search_analyzer="search_analyzer",
+            multi=True,
+        )
+    )
+    attorney_id = fields.ListField(fields.IntegerField(multi=True))
+    attorney = fields.ListField(
+        fields.TextField(
+            analyzer="text_en_splitting_cl",
+            fields={
+                "exact": fields.TextField(
+                    analyzer="english_exact",
+                    search_analyzer="search_analyzer_exact",
+                ),
+            },
+            search_analyzer="search_analyzer",
+            multi=True,
+        )
+    )
+    firm_id = fields.ListField(fields.IntegerField(multi=True))
+    firm = fields.ListField(
+        fields.TextField(
+            analyzer="text_en_splitting_cl",
+            fields={
+                "exact": fields.TextField(
+                    analyzer="english_exact",
+                    search_analyzer="search_analyzer_exact",
+                ),
+            },
+            search_analyzer="search_analyzer",
+            multi=True,
+        )
+    )
+
+    # RECAPDocument fields:
+    documents = fields.NestedField(
+        properties={
+            "id": fields.IntegerField(attr="pk"),
+            "docket_entry_id": fields.IntegerField(attr="docket_entry.pk"),
+            "description": fields.TextField(
+                attr="docket_entry.description",
+                analyzer="text_en_splitting_cl",
+                term_vector="with_positions_offsets",
+                fields={
+                    "exact": fields.TextField(
+                        attr="docket_entry.description",
+                        term_vector="with_positions_offsets",
+                        analyzer="english_exact",
+                        search_analyzer="search_analyzer_exact",
+                    ),
+                },
+                search_analyzer="search_analyzer",
+            ),
+            "entry_number": fields.LongField(attr="docket_entry.entry_number"),
+            "entry_date_filed": fields.DateField(
+                attr="docket_entry.date_filed"
+            ),
+            "short_description": fields.TextField(
+                attr="description",
+                analyzer="text_en_splitting_cl",
+                term_vector="with_positions_offsets",
+                fields={
+                    "exact": fields.TextField(
+                        attr="description",
+                        analyzer="english_exact",
+                        term_vector="with_positions_offsets",
+                        search_analyzer="search_analyzer_exact",
+                    ),
+                },
+                search_analyzer="search_analyzer",
+            ),
+            "document_type": fields.TextField(
+                analyzer="text_en_splitting_cl",
+                term_vector="with_positions_offsets",
+                fields={
+                    "exact": fields.TextField(
+                        analyzer="english_exact",
+                        term_vector="with_positions_offsets",
+                        search_analyzer="search_analyzer_exact",
+                    ),
+                },
+                search_analyzer="search_analyzer",
+            ),
+            "document_number": fields.LongField(),
+            "pacer_doc_id": fields.KeywordField(attr="pacer_doc_id"),
+            "plain_text": fields.TextField(
+                analyzer="text_en_splitting_cl",
+                term_vector="with_positions_offsets",
+                fields={
+                    "exact": fields.TextField(
+                        analyzer="english_exact",
+                        term_vector="with_positions_offsets",
+                        search_analyzer="search_analyzer_exact",
+                    ),
+                },
+                search_analyzer="search_analyzer",
+            ),
+            "attachment_number": fields.IntegerField(attr="attachment_number"),
+            "is_available": fields.BooleanField(attr="is_available"),
+            "page_count": fields.IntegerField(attr="page_count"),
+            "filepath_local": fields.KeywordField(index=False),
+            "absolute_url": fields.KeywordField(index=False),
+            "cites": fields.ListField(
+                fields.IntegerField(multi=True),
+            ),
+        }
+    )
+
+    # Meta
+    timestamp = fields.DateField()
+
+    class Django:
+        model = Docket
+        ignore_signals = True
+
+    def prepare_caseName(self, instance):
+        return best_case_name(instance)
+
+    def prepare_assignedTo(self, instance):
+        if instance.assigned_to:
+            return instance.assigned_to.name_full
+        elif instance.assigned_to_str:
+            return instance.assigned_to_str
+
+    def prepare_referredTo(self, instance):
+        if instance.referred_to:
+            return instance.referred_to.name_full
+        elif instance.referred_to_str:
+            return instance.referred_to_str
+
+    def prepare_chapter(self, instance):
+        if BankruptcyInformation.objects.filter(docket=instance).exists():
+            return instance.bankruptcy_information.chapter
+
+    def prepare_trustee_str(self, instance):
+        if BankruptcyInformation.objects.filter(docket=instance).exists():
+            return instance.bankruptcy_information.trustee_str
+
+    def prepare_docket_child(self, instance):
+        return "docket"
+
+    def prepare_docket_absolute_url(self, instance):
+        return instance.get_absolute_url()
+
+    def prepare_parties(self, instance):
+        out = {
+            "party_id": set(),
+            "party": set(),
+            "attorney_id": set(),
+            "attorney": set(),
+            "firm_id": set(),
+            "firm": set(),
+        }
+
+        # Extract only required parties values.
+        party_values = instance.parties.values_list("pk", "name")
+        for pk, name in party_values.iterator():
+            out["party_id"].add(pk)
+            out["party"].add(name)
+
+        # Extract only required attorney values.
+        atty_values = (
+            Attorney.objects.filter(roles__docket=instance)
+            .distinct()
+            .values_list("pk", "name")
+        )
+        for pk, name in atty_values.iterator():
+            out["attorney_id"].add(pk)
+            out["attorney"].add(name)
+
+        # Extract only required firm values.
+        firms_values = (
+            AttorneyOrganization.objects.filter(
+                attorney_organization_associations__docket=instance
+            )
+            .distinct()
+            .values_list("pk", "name")
+        )
+        for pk, name in firms_values.iterator():
+            out["firm_id"].add(pk)
+            out["firm"].add(name)
+
+        return out
+
+    def prepare_documents(self, instance):
+        rds = RECAPDocument.objects.filter(docket_entry__docket=instance)
+        return [
+            {
+                "id": rd.pk,
+                "docket_entry_id": rd.docket_entry_id,
+                "description": rd.docket_entry.description,
+                "entry_number": rd.docket_entry.entry_number,
+                "entry_date_filed": rd.docket_entry.date_filed,
+                "short_description": rd.description,
+                "document_type": rd.get_document_type_display(),
+                "document_number": rd.document_number or None,
+                "pacer_doc_id": rd.pacer_doc_id,
+                "plain_text": escape(rd.plain_text.translate(null_map)),
+                "attachment_number": rd.attachment_number,
+                "is_available": rd.is_available,
+                "page_count": rd.page_count,
+                "filepath_local": (
+                    rd.filepath_local.name if rd.filepath_local else None
+                ),
+                "absolute_url": rd.get_absolute_url(),
+                "cites": list(
+                    rd.cited_opinions.all().values_list(
+                        "cited_opinion_id", flat=True
+                    )
+                ),
+            }
+            for rd in rds
+        ]
+
+    def prepare(self, instance):
+        data = super().prepare(instance)
+        parties_prepared = self.prepare_parties(instance)
+        data["party_id"] = list(parties_prepared["party_id"])
+        data["party"] = list(parties_prepared["party"])
+        data["attorney_id"] = list(parties_prepared["attorney_id"])
+        data["attorney"] = list(parties_prepared["attorney"])
+        data["firm_id"] = list(parties_prepared["firm_id"])
+        data["firm"] = list(parties_prepared["firm"])
+        return data
diff --git a/cl/search/es_indices.py b/cl/search/es_indices.py
index 717a6abee9..bf129f0704 100644
--- a/cl/search/es_indices.py
+++ b/cl/search/es_indices.py
@@ -53,3 +53,12 @@
     number_of_replicas=settings.ELASTICSEARCH_OPINION_NUMBER_OF_REPLICAS,
     analysis=settings.ELASTICSEARCH_DSL["analysis"],
 )
+
+
+# Define RECAP Nested elasticsearch index
+recap_sweep_index = Index("recap_sweep")
+recap_sweep_index.settings(
+    number_of_shards=settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS,
+    number_of_replicas=settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
+    analysis=settings.ELASTICSEARCH_DSL["analysis"],
+)
diff --git a/cl/search/management/commands/cl_index_parent_and_child_docs.py b/cl/search/management/commands/cl_index_parent_and_child_docs.py
index abb528a3b0..e184187168 100644
--- a/cl/search/management/commands/cl_index_parent_and_child_docs.py
+++ b/cl/search/management/commands/cl_index_parent_and_child_docs.py
@@ -342,6 +342,11 @@ def add_arguments(self, parser):
             action="store_true",
             help="Use this flag to only index documents missing in the index.",
         )
+        parser.add_argument(
+            "--nested",
+            action="store_true",
+            help="Whether to perform a indexing of Nested documents.",
+        )
 
     def handle(self, *args, **options):
         super().handle(*args, **options)
@@ -475,6 +480,7 @@ def process_queryset(
         pk_offset = self.options["pk_offset"]
         document_type = self.options.get("document_type", None)
         missing = self.options.get("missing", False)
+        nested = self.options.get("nested", False)
         fields_map = {}
         if event_doc_type == EventTable.DOCKET:
             fields_map = recap_document_field_mapping["save"][Docket][
@@ -535,6 +541,7 @@ def process_queryset(
                             search_type,
                             document_type,
                             testing_mode=testing_mode,
+                            nested=nested,
                         ).set(queue=queue).apply_async()
                     case "remove_parent_and_child_docs_by_query":
                         remove_parent_and_child_docs_by_query.si(
diff --git a/cl/search/tasks.py b/cl/search/tasks.py
index df7d337f26..db1a0c3651 100644
--- a/cl/search/tasks.py
+++ b/cl/search/tasks.py
@@ -45,6 +45,7 @@
     OpinionDocument,
     PersonDocument,
     PositionDocument,
+    RECAPNestedDocument,
 )
 from cl.search.models import (
     SEARCH_TYPES,
@@ -1071,6 +1072,7 @@ def index_parent_or_child_docs(
     search_type: str,
     document_type: str | None,
     testing_mode: bool = False,
+    nested: bool = False,
 ) -> None:
     """Index parent or child documents in Elasticsearch.
 
@@ -1089,7 +1091,9 @@ def index_parent_or_child_docs(
     child_instances = QuerySet()
     match search_type:
         case SEARCH_TYPES.RECAP:
-            parent_es_document = DocketDocument
+            parent_es_document = (
+                RECAPNestedDocument if nested else DocketDocument
+            )
             child_es_document = ESRECAPDocument
             child_id_property = "RECAP"
             if document_type == "parent":
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 389193b204..15f651a475 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -19,6 +19,7 @@
 
 from cl.lib.elasticsearch_utils import (
     build_es_main_query,
+    do_es_sweep_nested_query,
     fetch_es_results,
     merge_unavailable_fields_on_parent_document,
     set_results_highlights,
@@ -46,9 +47,15 @@
     DocketESResultSerializer,
     RECAPDocumentESResultSerializer,
     RECAPESResultSerializer,
+    RECAPNestedResultSerializer,
 )
 from cl.search.api_views import SearchV4ViewSet
-from cl.search.documents import ES_CHILD_ID, DocketDocument, ESRECAPDocument
+from cl.search.documents import (
+    ES_CHILD_ID,
+    DocketDocument,
+    ESRECAPDocument,
+    RECAPNestedDocument,
+)
 from cl.search.factories import (
     BankruptcyInformationFactory,
     CourtFactory,
@@ -6672,3 +6679,63 @@ def test_recap_history_table_indexing(self) -> None:
         )
         if keys:
             self.r.delete(*keys)
+
+
+class RECAPSearchNestedIndexTest(
+    RECAPSearchAPICommonTests, ESIndexTestCase, TestCase
+):
+    """
+    RECAP Nested Index Tests
+    """
+
+    version_api = "v4"
+    skip_common_tests = False
+
+    @classmethod
+    def setUpTestData(cls):
+        cls.rebuild_index("people_db.Person")
+        cls.rebuild_index("search.Docket")
+        cls.mock_date = now().replace(day=15, hour=0)
+        with time_machine.travel(cls.mock_date, tick=False):
+            super().setUpTestData()
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                document_type="parent",
+                testing_mode=True,
+                nested=True,
+            )
+
+    async def _test_api_results_count(
+        self, params, expected_count, field_name
+    ):
+
+        search_query = RECAPNestedDocument.search()
+        results, total_hits = await sync_to_async(do_es_sweep_nested_query)(
+            search_query,
+            params,
+        )
+        results = RECAPNestedResultSerializer(results, many=True).data
+        got = len(results)
+        self.assertEqual(
+            got,
+            expected_count,
+            msg="Did not get the right number of search results in API with %s "
+            "filter applied.\n"
+            "Expected: %s\n"
+            "     Got: %s\n\n"
+            "Params were: %s" % (field_name, expected_count, got, params),
+        )
+        return results
+
+    async def test_cross_object_string_query(self) -> None:
+        """Confirm a cross-object string query return the right results."""
+
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": f"id:{self.rd_api.pk} cause:(401 Civil) juryDemand:Plaintiff short_description:(Order Letter) plain_text:(shown in the API)",
+        }
+
+        await self._test_api_results_count(search_params, 1, "API fields")

From 2955b0ba5d7426fd62671e6d00c044818f2773c2 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 21 Jun 2024 20:35:54 -0600
Subject: [PATCH 02/33] fix(alerts): Changed sweep index approach to
 parent-child documents

---
 cl/lib/elasticsearch_utils.py                 | 291 +++-----------
 cl/lib/test_helpers.py                        |  62 ++-
 cl/search/api_serializers.py                  |  16 +-
 cl/search/constants.py                        |  33 +-
 cl/search/documents.py                        | 369 +-----------------
 cl/search/forms.py                            |  10 +
 .../cl_index_parent_and_child_docs.py         |  13 +-
 cl/search/tasks.py                            |  20 +-
 cl/search/tests/tests_es_recap.py             | 126 +++++-
 cl/tests/cases.py                             |   6 +-
 10 files changed, 295 insertions(+), 651 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 7f407441aa..3280abc449 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -58,6 +58,8 @@
     PEOPLE_ES_HL_FIELDS,
     PEOPLE_ES_HL_KEYWORD_FIELDS,
     RELATED_PATTERN,
+    SEARCH_ALERTS_DOCKET_HL_FIELDS,
+    SEARCH_ALERTS_DOCKET_KEYWORDS_HL_FIELDS,
     SEARCH_ALERTS_ORAL_ARGUMENT_ES_HL_FIELDS,
     SEARCH_HL_TAG,
     SEARCH_OPINION_HL_FIELDS,
@@ -69,7 +71,6 @@
     SEARCH_RECAP_CHILD_HL_FIELDS,
     SEARCH_RECAP_CHILD_QUERY_FIELDS,
     SEARCH_RECAP_HL_FIELDS,
-    SEARCH_RECAP_NESTED_CHILD_QUERY_FIELDS,
     SEARCH_RECAP_PARENT_QUERY_FIELDS,
     api_child_highlight_map,
 )
@@ -889,6 +890,7 @@ def build_has_child_query(
     order_by: tuple[str, str] | None = None,
     child_highlighting: bool = True,
     default_current_date: datetime.date | None = None,
+    alerts: bool = False,
 ) -> QueryString:
     """Build a 'has_child' query.
 
@@ -901,6 +903,7 @@ def build_has_child_query(
     :param child_highlighting: Whether highlighting should be enabled in child docs.
     :param default_current_date: The default current date to use for computing
      a stable date score across pagination in the V4 Search API.
+    :param alerts: If highlighting is being applied to search Alerts hits.
     :return: The 'has_child' query.
     """
 
@@ -917,8 +920,9 @@ def build_has_child_query(
             default_current_date=default_current_date,
         )
 
+    hl_tag = ALERTS_HL_TAG if alerts else SEARCH_HL_TAG
     highlight_options, fields_to_exclude = build_highlights_dict(
-        highlighting_fields, SEARCH_HL_TAG, child_highlighting
+        highlighting_fields, hl_tag, child_highlighting
     )
 
     inner_hits = {
@@ -1068,7 +1072,7 @@ def build_es_base_query(
     cd: CleanData,
     child_highlighting: bool = True,
     api_version: Literal["v3", "v4"] | None = None,
-    nested_query: bool = False,
+    alerts: bool = False,
 ) -> tuple[Search, QueryString | None]:
     """Builds filters and fulltext_query based on the given cleaned
      data and returns an elasticsearch query.
@@ -1077,7 +1081,7 @@ def build_es_base_query(
     :param cd: The cleaned data object containing the query and filters.
     :param child_highlighting: Whether highlighting should be enabled in child docs.
     :param api_version: Optional, the request API version.
-    :param nested_query: Whether to perform a nested query.
+    :param alerts: If highlighting is being applied to search Alerts hits.
     :return: A two-tuple, the Elasticsearch search query object and an ES
     QueryString for child documents, or None if there is no need to query
     child documents.
@@ -1155,15 +1159,6 @@ def build_es_base_query(
                     ],
                 )
             )
-            nested_child_fields = SEARCH_RECAP_NESTED_CHILD_QUERY_FIELDS.copy()
-            nested_child_fields.extend(
-                add_fields_boosting(
-                    cd,
-                    [
-                        "description",
-                    ],
-                )
-            )
             child_query_fields = {"recap_document": child_fields}
             parent_query_fields = SEARCH_RECAP_PARENT_QUERY_FIELDS.copy()
             parent_query_fields.extend(
@@ -1175,21 +1170,14 @@ def build_es_base_query(
                     ],
                 )
             )
-
-            if nested_query:
-                main_query, _ = build_full_nested_es_queries(
-                    cd,
-                    nested_child_fields,
-                    parent_query_fields,
-                )
-            else:
-                main_query, join_query = build_full_join_es_queries(
-                    cd,
-                    child_query_fields,
-                    parent_query_fields,
-                    child_highlighting=child_highlighting,
-                    api_version=api_version,
-                )
+            main_query, join_query = build_full_join_es_queries(
+                cd,
+                child_query_fields,
+                parent_query_fields,
+                child_highlighting=child_highlighting,
+                api_version=api_version,
+                alerts=alerts,
+            )
 
         case SEARCH_TYPES.OPINION:
             str_query = cd.get("q", "")
@@ -1300,7 +1288,7 @@ def build_child_docs_query(
         query
         for query in parent_filters
         if isinstance(query, QueryString)
-        and query.fields[0] in ["party", "attorney"]
+        and query.fields[0] in ["party", "attorney", "firm"]
     ]
     parties_has_parent_query = build_has_parent_parties_query(parties_filters)
 
@@ -1475,7 +1463,15 @@ def add_es_highlighting(
             highlighting_fields = PEOPLE_ES_HL_FIELDS
             highlighting_keyword_fields = PEOPLE_ES_HL_KEYWORD_FIELDS
         case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS:
-            highlighting_fields = SEARCH_RECAP_HL_FIELDS
+            highlighting_fields = (
+                SEARCH_ALERTS_DOCKET_HL_FIELDS
+                if alerts
+                else SEARCH_RECAP_HL_FIELDS
+            )
+            if alerts:
+                highlighting_keyword_fields = (
+                    SEARCH_ALERTS_DOCKET_KEYWORDS_HL_FIELDS
+                )
         case SEARCH_TYPES.OPINION:
             highlighting_fields = SEARCH_OPINION_HL_FIELDS
 
@@ -2006,14 +2002,11 @@ def fetch_es_results(
     return [], 0, error, None, None
 
 
-def build_has_child_filters(
-    cd: CleanData, nested_query=False
-) -> list[QueryString]:
+def build_has_child_filters(cd: CleanData) -> list[QueryString]:
     """Builds Elasticsearch 'has_child' filters based on the given child type
     and CleanData.
 
     :param cd: The user input CleanedData.
-    :param nested_query: Whether to perform a nested query.
     :return: A list of QueryString objects containing the 'has_child' filters.
     """
 
@@ -2047,36 +2040,22 @@ def build_has_child_filters(
         attachment_number = cd.get("attachment_number", "")
 
         if available_only:
-            field = (
-                "is_available"
-                if not nested_query
-                else "documents.is_available"
-            )
             queries_list.extend(
                 build_term_query(
-                    field,
+                    "is_available",
                     available_only,
                 )
             )
         if description:
-            field = (
-                "description" if not nested_query else "documents.description"
-            )
-            queries_list.extend(build_text_filter(field, description))
+            queries_list.extend(build_text_filter("description", description))
         if document_number:
-            field = (
-                "document_number"
-                if not nested_query
-                else "documents.document_number"
+            queries_list.extend(
+                build_term_query("document_number", document_number)
             )
-            queries_list.extend(build_term_query(field, document_number))
         if attachment_number:
-            field = (
-                "attachment_number"
-                if not nested_query
-                else "documents.attachment_number"
+            queries_list.extend(
+                build_term_query("attachment_number", attachment_number)
             )
-            queries_list.extend(build_term_query(field, attachment_number))
 
     return queries_list
 
@@ -2133,6 +2112,7 @@ def build_join_es_filters(cd: CleanData) -> List:
                 *build_text_filter("referredTo", cd.get("referred_to", "")),
                 *build_text_filter("party", cd.get("party_name", "")),
                 *build_text_filter("attorney", cd.get("atty_name", "")),
+                *build_text_filter("firm", cd.get("firm_name", "")),
                 *build_daterange_query(
                     "dateFiled",
                     cd.get("filed_before", ""),
@@ -2357,6 +2337,7 @@ def build_full_join_es_queries(
     mlt_query: Query | None = None,
     child_highlighting: bool = True,
     api_version: Literal["v3", "v4"] | None = None,
+    alerts: bool = False,
 ) -> tuple[QueryString | list, QueryString | None]:
     """Build a complete Elasticsearch query with both parent and child document
       conditions.
@@ -2367,6 +2348,7 @@ def build_full_join_es_queries(
     :param mlt_query: the More Like This Query object.
     :param child_highlighting: Whether highlighting should be enabled in child docs.
     :param api_version: Optional, the request API version.
+    :param alerts: If highlighting is being applied to search Alerts hits.
     :return: An Elasticsearch QueryString object.
     """
 
@@ -2411,7 +2393,7 @@ def build_full_join_es_queries(
             query
             for query in parent_filters
             if isinstance(query, QueryString)
-            and query.fields[0] in ["party", "attorney"]
+            and query.fields[0] in ["party", "attorney", "firm"]
         ]
         has_parent_parties_filter = build_has_parent_parties_query(
             parties_filters
@@ -2425,7 +2407,7 @@ def build_full_join_es_queries(
                     query
                     for query in parent_filters
                     if not isinstance(query, QueryString)
-                    or query.fields[0] not in ["party", "attorney"]
+                    or query.fields[0] not in ["party", "attorney", "firm"]
                 ]
             )
             if parties_filters:
@@ -2473,6 +2455,7 @@ def build_full_join_es_queries(
                 get_function_score_sorting_key(cd, api_version),
                 child_highlighting=child_highlighting,
                 default_current_date=cd.get("request_date"),
+                alerts=alerts,
             )
 
         if parties_filters and not has_child_query:
@@ -2487,6 +2470,7 @@ def build_full_join_es_queries(
                 SEARCH_RECAP_CHILD_HL_FIELDS,
                 get_function_score_sorting_key(cd, api_version),
                 default_current_date=cd.get("request_date"),
+                alerts=alerts,
             )
 
         if has_child_query:
@@ -3055,175 +3039,10 @@ def do_es_alert_estimation_query(
     return estimation_query.count()
 
 
-def build_nested_child_query(
-    query: QueryString | str,
-    child_type: str,
-    child_hits_limit: int,
-    highlighting_fields: dict[str, int] | None = None,
-) -> QueryString:
-    """Build a nested query.
-
-    :param query: The Elasticsearch query string or QueryString object.
-    :param child_type: The type of the child document.
-    :param child_hits_limit: The maximum number of child hits to be returned.
-    :param highlighting_fields: List of fields to highlight in child docs.
-    :return: The 'has_child' query.
-    """
-
-    highlight_options, fields_to_exclude = build_highlights_dict(
-        highlighting_fields, SEARCH_HL_TAG
-    )
-    inner_hits = {
-        "name": f"filter_query_inner_{child_type}",
-        "size": child_hits_limit,
-        "_source": {
-            "excludes": fields_to_exclude,
-        },
-    }
-    if highlight_options:
-        inner_hits["highlight"] = highlight_options
-
-    return Q(
-        "nested",
-        path="documents",
-        score_mode="max",
-        query=query,
-        inner_hits=inner_hits,
-    )
-
-
-def build_full_nested_es_queries(
-    cd: CleanData,
-    child_query_fields: list[str],
-    parent_query_fields: list[str],
-) -> tuple[QueryString | list, QueryString | None]:
-    """Build a complete Elasticsearch query with both parent and nested
-    documents conditions.
-
-    :param cd: The query CleanedData
-    :param child_query_fields: A dictionary mapping child fields document type.
-    :param parent_query_fields: A list of fields for the parent document.
-    :return: An Elasticsearch QueryString object.
-    """
-
-    q_should = []
-    child_query = None
-    if cd["type"] in [
-        SEARCH_TYPES.RECAP,
-        SEARCH_TYPES.DOCKETS,
-        SEARCH_TYPES.RECAP_DOCUMENT,
-        SEARCH_TYPES.OPINION,
-        SEARCH_TYPES.PEOPLE,
-    ]:
-        # Build child filters.
-        child_filters = build_has_child_filters(cd, nested_query=True)
-        # Copy the original child_filters before appending parent fields.
-        # For its use later in the parent filters.
-        child_filters_original = deepcopy(child_filters)
-        # Build child text query.
-        child_fields = [f"documents.{field}" for field in child_query_fields]
-        child_text_query = build_fulltext_query(
-            child_fields, cd.get("q", ""), only_queries=True
-        )
-
-        # Build parent filters.
-        parent_filters = build_join_es_filters(cd)
-
-        # Build the child query based on child_filters and child child_text_query
-        match child_filters, child_text_query:
-            case [], []:
-                pass
-            case [], _:
-                child_query = Q(
-                    "bool",
-                    should=child_text_query,
-                    minimum_should_match=1,
-                )
-            case _, []:
-                child_query = Q(
-                    "bool",
-                    filter=child_filters,
-                )
-            case _, _:
-                child_query = Q(
-                    "bool",
-                    filter=child_filters,
-                    should=child_text_query,
-                    minimum_should_match=1,
-                )
-
-        _, query_hits_limit = get_child_top_hits_limit(cd, cd["type"])
-        has_child_query = None
-        if child_text_query or child_filters:
-            hl_fields = api_child_highlight_map.get((True, cd["type"]), {})
-            has_child_query = build_nested_child_query(
-                child_query,
-                "recap_document",
-                query_hits_limit,
-                hl_fields,
-            )
-
-        if has_child_query:
-            q_should.append(has_child_query)
-
-        # Build the parent filter and text queries.
-        string_query = build_fulltext_query(
-            parent_query_fields, cd.get("q", ""), only_queries=True
-        )
-
-        # If child filters are set, add a nested query as a filter to the
-        # parent query to exclude results without matching children.
-        if child_filters_original:
-            parent_filters.append(
-                Q(
-                    "nested",
-                    path="documents",
-                    score_mode="max",
-                    query=Q("bool", filter=child_filters_original),
-                )
-            )
-        parent_query = None
-        match parent_filters, string_query:
-            case [], []:
-                pass
-            case [], _:
-                parent_query = Q(
-                    "bool",
-                    should=string_query,
-                    minimum_should_match=1,
-                )
-            case _, []:
-                parent_query = Q(
-                    "bool",
-                    filter=parent_filters,
-                )
-            case _, _:
-                parent_query = Q(
-                    "bool",
-                    filter=parent_filters,
-                    should=string_query,
-                    minimum_should_match=1,
-                )
-        if parent_query:
-            q_should.append(parent_query)
-
-    if not q_should:
-        return [], child_query
-
-    final_query = Q(
-        "bool",
-        should=q_should,
-    )
-    return (
-        final_query,
-        child_query,
-    )
-
-
 def do_es_sweep_nested_query(
     search_query: Search,
     cd: CleanData,
-) -> tuple[list[defaultdict] | None, int | None]:
+) -> tuple[list[Hit] | None, int | None]:
     """Build an ES query for its use in the daily RECAP sweep index.
 
     :param search_query: Elasticsearch DSL Search object.
@@ -3241,19 +3060,15 @@ def do_es_sweep_nested_query(
 
     hits = None
     try:
-        s, _ = build_es_base_query(
-            search_query,
-            cd,
-            True,
-            nested_query=True,
-        )
+        s, _ = build_es_base_query(search_query, cd, True, alerts=True)
     except (
         UnbalancedParenthesesQuery,
         UnbalancedQuotesQuery,
         BadProximityQuery,
     ) as e:
         raise ElasticBadRequestError(detail=e.message)
-    main_query = add_es_highlighting(s, cd, highlighting=True)
+    main_query = add_es_highlighting(s, cd, alerts=True)
+    main_query = main_query.sort(build_sort_results(cd))
     main_query = main_query.extra(from_=0, size=30)
     results = main_query.execute()
     if results:
@@ -3272,3 +3087,21 @@ def do_es_sweep_nested_query(
             result["child_docs"] = child_result_objects
 
     return results, hits
+
+
+def docket_field_matched(hit: Hit) -> bool:
+    """Determine whether HL matched a Docket field.
+
+    :param hit: The ES hit.
+    :return: True if the hit matched a Docket field. Otherwise, False.
+    """
+
+    plain_hl = set(SEARCH_ALERTS_DOCKET_KEYWORDS_HL_FIELDS)
+    vector_hl = set(SEARCH_ALERTS_DOCKET_HL_FIELDS.keys())
+    docket_hl = set()
+    if hasattr(hit.meta, "highlight"):
+        highlights = hit.meta.highlight.to_dict()
+        docket_hl = set([hl for hl in highlights.keys()])
+    if docket_hl.issubset(plain_hl.union(vector_hl)):
+        return True
+    return False
diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py
index 69976430f1..d7e1c49ebc 100644
--- a/cl/lib/test_helpers.py
+++ b/cl/lib/test_helpers.py
@@ -278,15 +278,19 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
     ),
     "court_id": lambda x: x["result"].docket_entry.docket.court.pk,
     "dateArgued": lambda x: (
-        (
-            x["result"].docket_entry.docket.date_argued.isoformat()
-            if x.get("V4")
-            else midnight_pt_test(
-                x["result"].docket_entry.docket.date_argued
-            ).isoformat()
+        x["dateArgued"]
+        if x.get("dateArgued")
+        else (
+            (
+                x["result"].docket_entry.docket.date_argued.isoformat()
+                if x.get("V4")
+                else midnight_pt_test(
+                    x["result"].docket_entry.docket.date_argued
+                ).isoformat()
+            )
+            if x["result"].docket_entry.docket.date_argued
+            else None
         )
-        if x["result"].docket_entry.docket.date_argued
-        else None
     ),
     "dateFiled": lambda x: (
         (
@@ -315,7 +319,11 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
         if x.get("docketNumber")
         else x["result"].docket_entry.docket.docket_number
     ),
-    "docket_id": lambda x: x["result"].docket_entry.docket_id,
+    "docket_id": lambda x: (
+        x["docket_id"]
+        if x.get("docket_id")
+        else x["result"].docket_entry.docket_id
+    ),
     "jurisdictionType": lambda x: x[
         "result"
     ].docket_entry.docket.jurisdiction_type,
@@ -348,10 +356,14 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
 recap_type_v4_api_keys = docket_api_common_keys.copy()
 recap_type_v4_api_keys.update(
     {
-        "attorney": lambda x: list(
-            DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
-                "attorney"
-            ]
+        "attorney": lambda x: (
+            x["attorney"]
+            if x.get("attorney")
+            else list(
+                DocketDocument().prepare_parties(
+                    x["result"].docket_entry.docket
+                )["attorney"]
+            )
         ),
         "attorney_id": lambda x: list(
             DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
@@ -371,10 +383,14 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
         "docket_absolute_url": lambda x: x[
             "result"
         ].docket_entry.docket.get_absolute_url(),
-        "firm": lambda x: list(
-            DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
-                "firm"
-            ]
+        "firm": lambda x: (
+            x["firm"]
+            if x.get("firm")
+            else list(
+                DocketDocument().prepare_parties(
+                    x["result"].docket_entry.docket
+                )["firm"]
+            )
         ),
         "firm_id": lambda x: list(
             DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
@@ -386,10 +402,14 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
             if x["result"].docket_entry.docket.pacer_case_id
             else ""
         ),
-        "party": lambda x: list(
-            DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
-                "party"
-            ]
+        "party": lambda x: (
+            x["party"]
+            if x.get("party")
+            else list(
+                DocketDocument().prepare_parties(
+                    x["result"].docket_entry.docket
+                )["party"]
+            )
         ),
         "party_id": lambda x: list(
             DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py
index 7c72dd5a2d..f5abc46b93 100644
--- a/cl/search/api_serializers.py
+++ b/cl/search/api_serializers.py
@@ -528,6 +528,7 @@ class BaseDocketESResultSerializer(DocumentSerializer):
     attorney = NoneToListField(read_only=True, required=False)
     firm_id = NoneToListField(read_only=True, required=False)
     firm = NoneToListField(read_only=True, required=False)
+    docket_id = HighlightedField(read_only=True)
 
     class Meta:
         document = DocketDocument
@@ -731,18 +732,3 @@ class Meta:
             "pacer_doc_id",
             "trustee_str",
         )
-
-
-class RECAPNestedResultSerializer(
-    RECAPMetaMixin, BaseDocketESResultSerializer
-):
-    """The serializer class for RECAP search type results."""
-
-    recap_documents = BaseRECAPDocumentESResultSerializer(
-        many=True, read_only=True, source="child_docs"
-    )
-
-    class Meta(BaseDocketESResultSerializer.Meta):
-        exclude = BaseDocketESResultSerializer.Meta.exclude + (
-            "docket_absolute_url",
-        )
diff --git a/cl/search/constants.py b/cl/search/constants.py
index 0dedfddc3c..0457e53799 100644
--- a/cl/search/constants.py
+++ b/cl/search/constants.py
@@ -96,11 +96,6 @@
     "chapter",
     "trustee_str",
 ]
-SEARCH_RECAP_NESTED_CHILD_QUERY_FIELDS = [
-    "short_description",
-    "plain_text",
-    "document_type",
-]
 SEARCH_OPINION_QUERY_FIELDS = [
     "court",
     "court_id",
@@ -161,6 +156,34 @@
     "suitNature": 0,
 }
 
+SEARCH_ALERTS_DOCKET_HL_FIELDS = {
+    "assignedTo": 0,
+    "caseName": 0,
+    "cause": 0,
+    "court_citation_string": 0,
+    "docketNumber": 0,
+    "juryDemand": 0,
+    "referredTo": 0,
+    "suitNature": 0,
+    "party": 0,
+    "attorney": 0,
+    "firm": 0,
+}
+
+SEARCH_ALERTS_DOCKET_KEYWORDS_HL_FIELDS = {
+    "docket_id",
+    "court_id",
+    "firm_id",
+    "assigned_to_id",
+    "referred_to_id",
+    "dateFiled",
+    "dateArgued",
+    "dateTerminated",
+    "jurisdictionType",
+    "chapter",
+    "trustee_str",
+}
+
 SEARCH_OPINION_HL_FIELDS = {
     "caseName": 0,
     "citation": 0,
diff --git a/cl/search/documents.py b/cl/search/documents.py
index 5fada32689..957f03034e 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -36,7 +36,6 @@
     BankruptcyInformation,
     Citation,
     Docket,
-    DocketEntry,
     Opinion,
     OpinionCluster,
     ParentheticalGroup,
@@ -791,7 +790,7 @@ class DocketBaseDocument(Document):
     timestamp = fields.DateField()
 
     # Docket Fields
-    docket_id = fields.IntegerField(attr="pk")
+    docket_id = fields.KeywordField(attr="pk")
     caseName = fields.TextField(
         analyzer="text_en_splitting_cl",
         term_vector="with_positions_offsets",
@@ -1831,179 +1830,15 @@ def prepare_cluster_child(self, instance):
 
 
 @recap_sweep_index.document
-class RECAPNestedDocument(Document):
-    # Docket Fields
-    docket_id = fields.IntegerField(attr="pk")
-    caseName = fields.TextField(
-        analyzer="text_en_splitting_cl",
-        term_vector="with_positions_offsets",
-        fields={
-            "exact": fields.TextField(
-                analyzer="english_exact",
-                search_analyzer="search_analyzer_exact",
-                term_vector="with_positions_offsets",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    case_name_full = fields.TextField(
-        attr="case_name_full",
-        analyzer="text_en_splitting_cl",
-        fields={
-            "exact": fields.TextField(
-                attr="case_name_full",
-                analyzer="english_exact",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    docketNumber = fields.TextField(
-        attr="docket_number",
-        analyzer="text_en_splitting_cl",
-        term_vector="with_positions_offsets",
-        fields={
-            "exact": fields.TextField(
-                attr="docket_number",
-                analyzer="english_exact",
-                term_vector="with_positions_offsets",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    suitNature = fields.TextField(
-        attr="nature_of_suit",
-        analyzer="text_en_splitting_cl",
-        term_vector="with_positions_offsets",
-        fields={
-            "exact": fields.TextField(
-                attr="nature_of_suit",
-                analyzer="english_exact",
-                term_vector="with_positions_offsets",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    cause = fields.TextField(
-        attr="cause",
-        analyzer="text_en_splitting_cl",
-        term_vector="with_positions_offsets",
-        fields={
-            "exact": fields.TextField(
-                attr="cause",
-                analyzer="english_exact",
-                term_vector="with_positions_offsets",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    juryDemand = fields.TextField(
-        attr="jury_demand",
-        analyzer="text_en_splitting_cl",
-        term_vector="with_positions_offsets",
-        fields={
-            "exact": fields.TextField(
-                attr="jury_demand",
-                analyzer="english_exact",
-                term_vector="with_positions_offsets",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    jurisdictionType = fields.TextField(
-        attr="jurisdiction_type",
-        analyzer="text_en_splitting_cl",
-        fields={
-            "exact": fields.TextField(
-                attr="jurisdiction_type",
-                analyzer="english_exact",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    dateArgued = fields.DateField(attr="date_argued")
-    dateFiled = fields.DateField(attr="date_filed")
-    dateTerminated = fields.DateField(attr="date_terminated")
-    assignedTo = fields.TextField(
-        analyzer="text_en_splitting_cl",
-        term_vector="with_positions_offsets",
-        fields={
-            "exact": fields.TextField(
-                analyzer="english_exact",
-                term_vector="with_positions_offsets",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    assigned_to_id = fields.KeywordField(attr="assigned_to.pk")
-    referredTo = fields.TextField(
-        analyzer="text_en_splitting_cl",
-        term_vector="with_positions_offsets",
-        fields={
-            "exact": fields.TextField(
-                analyzer="english_exact",
-                term_vector="with_positions_offsets",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    referred_to_id = fields.KeywordField(attr="referred_to.pk")
-    court = fields.TextField(
-        attr="court.full_name",
-        analyzer="text_en_splitting_cl",
-        fields={
-            "exact": fields.TextField(
-                attr="court.full_name",
-                analyzer="english_exact",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    court_id = fields.TextField(
-        attr="court.pk",
-        analyzer="text_en_splitting_cl",
-        fields={"raw": fields.KeywordField(attr="court.pk")},
-        search_analyzer="search_analyzer",
-    )
-    court_citation_string = fields.TextField(
-        attr="court.citation_string",
-        analyzer="text_en_splitting_cl",
-        search_analyzer="search_analyzer",
-        term_vector="with_positions_offsets",
-    )
-    chapter = fields.TextField(
-        analyzer="text_en_splitting_cl",
-        search_analyzer="search_analyzer",
-    )
-    trustee_str = fields.TextField(
-        analyzer="text_en_splitting_cl",
-        fields={
-            "exact": fields.TextField(
-                analyzer="english_exact",
-                search_analyzer="search_analyzer_exact",
-            ),
-        },
-        search_analyzer="search_analyzer",
-    )
-    date_created = fields.DateField(attr="date_created")
-    pacer_case_id = fields.KeywordField(attr="pacer_case_id")
-
-    # Parties
-    party_id = fields.ListField(fields.IntegerField(multi=True))
+class DocketSweepDocument(DocketDocument):
     party = fields.ListField(
         fields.TextField(
             analyzer="text_en_splitting_cl",
+            term_vector="with_positions_offsets",
             fields={
                 "exact": fields.TextField(
                     analyzer="english_exact",
+                    term_vector="with_positions_offsets",
                     search_analyzer="search_analyzer_exact",
                 ),
             },
@@ -2011,13 +1846,14 @@ class RECAPNestedDocument(Document):
             multi=True,
         )
     )
-    attorney_id = fields.ListField(fields.IntegerField(multi=True))
     attorney = fields.ListField(
         fields.TextField(
             analyzer="text_en_splitting_cl",
+            term_vector="with_positions_offsets",
             fields={
                 "exact": fields.TextField(
                     analyzer="english_exact",
+                    term_vector="with_positions_offsets",
                     search_analyzer="search_analyzer_exact",
                 ),
             },
@@ -2025,13 +1861,14 @@ class RECAPNestedDocument(Document):
             multi=True,
         )
     )
-    firm_id = fields.ListField(fields.IntegerField(multi=True))
     firm = fields.ListField(
         fields.TextField(
             analyzer="text_en_splitting_cl",
+            term_vector="with_positions_offsets",
             fields={
                 "exact": fields.TextField(
                     analyzer="english_exact",
+                    term_vector="with_positions_offsets",
                     search_analyzer="search_analyzer_exact",
                 ),
             },
@@ -2040,193 +1877,13 @@ class RECAPNestedDocument(Document):
         )
     )
 
-    # RECAPDocument fields:
-    documents = fields.NestedField(
-        properties={
-            "id": fields.IntegerField(attr="pk"),
-            "docket_entry_id": fields.IntegerField(attr="docket_entry.pk"),
-            "description": fields.TextField(
-                attr="docket_entry.description",
-                analyzer="text_en_splitting_cl",
-                term_vector="with_positions_offsets",
-                fields={
-                    "exact": fields.TextField(
-                        attr="docket_entry.description",
-                        term_vector="with_positions_offsets",
-                        analyzer="english_exact",
-                        search_analyzer="search_analyzer_exact",
-                    ),
-                },
-                search_analyzer="search_analyzer",
-            ),
-            "entry_number": fields.LongField(attr="docket_entry.entry_number"),
-            "entry_date_filed": fields.DateField(
-                attr="docket_entry.date_filed"
-            ),
-            "short_description": fields.TextField(
-                attr="description",
-                analyzer="text_en_splitting_cl",
-                term_vector="with_positions_offsets",
-                fields={
-                    "exact": fields.TextField(
-                        attr="description",
-                        analyzer="english_exact",
-                        term_vector="with_positions_offsets",
-                        search_analyzer="search_analyzer_exact",
-                    ),
-                },
-                search_analyzer="search_analyzer",
-            ),
-            "document_type": fields.TextField(
-                analyzer="text_en_splitting_cl",
-                term_vector="with_positions_offsets",
-                fields={
-                    "exact": fields.TextField(
-                        analyzer="english_exact",
-                        term_vector="with_positions_offsets",
-                        search_analyzer="search_analyzer_exact",
-                    ),
-                },
-                search_analyzer="search_analyzer",
-            ),
-            "document_number": fields.LongField(),
-            "pacer_doc_id": fields.KeywordField(attr="pacer_doc_id"),
-            "plain_text": fields.TextField(
-                analyzer="text_en_splitting_cl",
-                term_vector="with_positions_offsets",
-                fields={
-                    "exact": fields.TextField(
-                        analyzer="english_exact",
-                        term_vector="with_positions_offsets",
-                        search_analyzer="search_analyzer_exact",
-                    ),
-                },
-                search_analyzer="search_analyzer",
-            ),
-            "attachment_number": fields.IntegerField(attr="attachment_number"),
-            "is_available": fields.BooleanField(attr="is_available"),
-            "page_count": fields.IntegerField(attr="page_count"),
-            "filepath_local": fields.KeywordField(index=False),
-            "absolute_url": fields.KeywordField(index=False),
-            "cites": fields.ListField(
-                fields.IntegerField(multi=True),
-            ),
-        }
-    )
-
-    # Meta
-    timestamp = fields.DateField()
-
     class Django:
         model = Docket
         ignore_signals = True
 
-    def prepare_caseName(self, instance):
-        return best_case_name(instance)
-
-    def prepare_assignedTo(self, instance):
-        if instance.assigned_to:
-            return instance.assigned_to.name_full
-        elif instance.assigned_to_str:
-            return instance.assigned_to_str
-
-    def prepare_referredTo(self, instance):
-        if instance.referred_to:
-            return instance.referred_to.name_full
-        elif instance.referred_to_str:
-            return instance.referred_to_str
-
-    def prepare_chapter(self, instance):
-        if BankruptcyInformation.objects.filter(docket=instance).exists():
-            return instance.bankruptcy_information.chapter
-
-    def prepare_trustee_str(self, instance):
-        if BankruptcyInformation.objects.filter(docket=instance).exists():
-            return instance.bankruptcy_information.trustee_str
-
-    def prepare_docket_child(self, instance):
-        return "docket"
-
-    def prepare_docket_absolute_url(self, instance):
-        return instance.get_absolute_url()
 
-    def prepare_parties(self, instance):
-        out = {
-            "party_id": set(),
-            "party": set(),
-            "attorney_id": set(),
-            "attorney": set(),
-            "firm_id": set(),
-            "firm": set(),
-        }
-
-        # Extract only required parties values.
-        party_values = instance.parties.values_list("pk", "name")
-        for pk, name in party_values.iterator():
-            out["party_id"].add(pk)
-            out["party"].add(name)
-
-        # Extract only required attorney values.
-        atty_values = (
-            Attorney.objects.filter(roles__docket=instance)
-            .distinct()
-            .values_list("pk", "name")
-        )
-        for pk, name in atty_values.iterator():
-            out["attorney_id"].add(pk)
-            out["attorney"].add(name)
-
-        # Extract only required firm values.
-        firms_values = (
-            AttorneyOrganization.objects.filter(
-                attorney_organization_associations__docket=instance
-            )
-            .distinct()
-            .values_list("pk", "name")
-        )
-        for pk, name in firms_values.iterator():
-            out["firm_id"].add(pk)
-            out["firm"].add(name)
-
-        return out
-
-    def prepare_documents(self, instance):
-        rds = RECAPDocument.objects.filter(docket_entry__docket=instance)
-        return [
-            {
-                "id": rd.pk,
-                "docket_entry_id": rd.docket_entry_id,
-                "description": rd.docket_entry.description,
-                "entry_number": rd.docket_entry.entry_number,
-                "entry_date_filed": rd.docket_entry.date_filed,
-                "short_description": rd.description,
-                "document_type": rd.get_document_type_display(),
-                "document_number": rd.document_number or None,
-                "pacer_doc_id": rd.pacer_doc_id,
-                "plain_text": escape(rd.plain_text.translate(null_map)),
-                "attachment_number": rd.attachment_number,
-                "is_available": rd.is_available,
-                "page_count": rd.page_count,
-                "filepath_local": (
-                    rd.filepath_local.name if rd.filepath_local else None
-                ),
-                "absolute_url": rd.get_absolute_url(),
-                "cites": list(
-                    rd.cited_opinions.all().values_list(
-                        "cited_opinion_id", flat=True
-                    )
-                ),
-            }
-            for rd in rds
-        ]
-
-    def prepare(self, instance):
-        data = super().prepare(instance)
-        parties_prepared = self.prepare_parties(instance)
-        data["party_id"] = list(parties_prepared["party_id"])
-        data["party"] = list(parties_prepared["party"])
-        data["attorney_id"] = list(parties_prepared["attorney_id"])
-        data["attorney"] = list(parties_prepared["attorney"])
-        data["firm_id"] = list(parties_prepared["firm_id"])
-        data["firm"] = list(parties_prepared["firm"])
-        return data
+@recap_sweep_index.document
+class ESRECAPSweepDocument(ESRECAPDocument):
+    class Django:
+        model = RECAPDocument
+        ignore_signals = True
diff --git a/cl/search/forms.py b/cl/search/forms.py
index 7ff40a7f3e..7dcd91eaeb 100644
--- a/cl/search/forms.py
+++ b/cl/search/forms.py
@@ -221,6 +221,16 @@ class SearchForm(forms.Form):
         ),
     )
     atty_name.as_str_types = [SEARCH_TYPES.RECAP]
+    firm_name = forms.CharField(
+        required=False,
+        label="Firm Name",
+        widget=forms.TextInput(
+            attrs={
+                "class": "external-input form-control",
+                "autocomplete": "off",
+            },
+        ),
+    )
 
     #
     # Oral argument fields
diff --git a/cl/search/management/commands/cl_index_parent_and_child_docs.py b/cl/search/management/commands/cl_index_parent_and_child_docs.py
index e184187168..c4edec4ba0 100644
--- a/cl/search/management/commands/cl_index_parent_and_child_docs.py
+++ b/cl/search/management/commands/cl_index_parent_and_child_docs.py
@@ -343,9 +343,9 @@ def add_arguments(self, parser):
             help="Use this flag to only index documents missing in the index.",
         )
         parser.add_argument(
-            "--nested",
+            "--sweep-index",
             action="store_true",
-            help="Whether to perform a indexing of Nested documents.",
+            help="Whether to perform an indexing for the sweep index.",
         )
 
     def handle(self, *args, **options):
@@ -480,7 +480,7 @@ def process_queryset(
         pk_offset = self.options["pk_offset"]
         document_type = self.options.get("document_type", None)
         missing = self.options.get("missing", False)
-        nested = self.options.get("nested", False)
+        sweep_index = self.options.get("sweep_index", False)
         fields_map = {}
         if event_doc_type == EventTable.DOCKET:
             fields_map = recap_document_field_mapping["save"][Docket][
@@ -532,7 +532,10 @@ def process_queryset(
                 match task_to_use:
                     case "index_parent_and_child_docs":
                         index_parent_and_child_docs.si(
-                            chunk, search_type, testing_mode=testing_mode
+                            chunk,
+                            search_type,
+                            testing_mode=testing_mode,
+                            sweep_index=sweep_index,
                         ).set(queue=queue).apply_async()
 
                     case "index_parent_or_child_docs":
@@ -541,7 +544,7 @@ def process_queryset(
                             search_type,
                             document_type,
                             testing_mode=testing_mode,
-                            nested=nested,
+                            sweep_index=sweep_index,
                         ).set(queue=queue).apply_async()
                     case "remove_parent_and_child_docs_by_query":
                         remove_parent_and_child_docs_by_query.si(
diff --git a/cl/search/tasks.py b/cl/search/tasks.py
index db1a0c3651..38bb8cdbe1 100644
--- a/cl/search/tasks.py
+++ b/cl/search/tasks.py
@@ -40,12 +40,13 @@
     ES_CHILD_ID,
     AudioDocument,
     DocketDocument,
+    DocketSweepDocument,
     ESRECAPDocument,
+    ESRECAPSweepDocument,
     OpinionClusterDocument,
     OpinionDocument,
     PersonDocument,
     PositionDocument,
-    RECAPNestedDocument,
 )
 from cl.search.models import (
     SEARCH_TYPES,
@@ -967,6 +968,7 @@ def index_parent_and_child_docs(
     instance_ids: list[int],
     search_type: str,
     testing_mode: bool = False,
+    sweep_index: bool = False,
 ) -> None:
     """Index parent and child documents in Elasticsearch.
 
@@ -986,8 +988,12 @@ def index_parent_and_child_docs(
             child_es_document = PositionDocument
             child_id_property = "POSITION"
         case SEARCH_TYPES.RECAP:
-            parent_es_document = DocketDocument
-            child_es_document = ESRECAPDocument
+            parent_es_document = (
+                DocketSweepDocument if sweep_index else DocketDocument
+            )
+            child_es_document = (
+                ESRECAPSweepDocument if sweep_index else ESRECAPDocument
+            )
             child_id_property = "RECAP"
         case SEARCH_TYPES.OPINION:
             parent_es_document = OpinionClusterDocument
@@ -1072,7 +1078,7 @@ def index_parent_or_child_docs(
     search_type: str,
     document_type: str | None,
     testing_mode: bool = False,
-    nested: bool = False,
+    sweep_index: bool = False,
 ) -> None:
     """Index parent or child documents in Elasticsearch.
 
@@ -1092,9 +1098,11 @@ def index_parent_or_child_docs(
     match search_type:
         case SEARCH_TYPES.RECAP:
             parent_es_document = (
-                RECAPNestedDocument if nested else DocketDocument
+                DocketSweepDocument if sweep_index else DocketDocument
+            )
+            child_es_document = (
+                ESRECAPSweepDocument if sweep_index else ESRECAPDocument
             )
-            child_es_document = ESRECAPDocument
             child_id_property = "RECAP"
             if document_type == "parent":
                 parent_instances = Docket.objects.filter(pk__in=instance_ids)
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 15f651a475..2cc46a3842 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -20,6 +20,7 @@
 from cl.lib.elasticsearch_utils import (
     build_es_main_query,
     do_es_sweep_nested_query,
+    docket_field_matched,
     fetch_es_results,
     merge_unavailable_fields_on_parent_document,
     set_results_highlights,
@@ -47,14 +48,13 @@
     DocketESResultSerializer,
     RECAPDocumentESResultSerializer,
     RECAPESResultSerializer,
-    RECAPNestedResultSerializer,
 )
 from cl.search.api_views import SearchV4ViewSet
 from cl.search.documents import (
     ES_CHILD_ID,
     DocketDocument,
+    DocketSweepDocument,
     ESRECAPDocument,
-    RECAPNestedDocument,
 )
 from cl.search.factories import (
     BankruptcyInformationFactory,
@@ -6681,11 +6681,11 @@ def test_recap_history_table_indexing(self) -> None:
             self.r.delete(*keys)
 
 
-class RECAPSearchNestedIndexTest(
-    RECAPSearchAPICommonTests, ESIndexTestCase, TestCase
+class RECAPSearchSweepIndexTest(
+    RECAPSearchAPICommonTests, ESIndexTestCase, TestCase, V4SearchAPIAssertions
 ):
     """
-    RECAP Nested Index Tests
+    RECAP Sweep Index Tests
     """
 
     version_api = "v4"
@@ -6703,21 +6703,20 @@ def setUpTestData(cls):
                 search_type=SEARCH_TYPES.RECAP,
                 queue="celery",
                 pk_offset=0,
-                document_type="parent",
                 testing_mode=True,
-                nested=True,
+                sweep_index=True,
             )
 
     async def _test_api_results_count(
         self, params, expected_count, field_name
     ):
 
-        search_query = RECAPNestedDocument.search()
+        search_query = DocketSweepDocument.search()
         results, total_hits = await sync_to_async(do_es_sweep_nested_query)(
             search_query,
             params,
         )
-        results = RECAPNestedResultSerializer(results, many=True).data
+        results = RECAPESResultSerializer(results, many=True).data
         got = len(results)
         self.assertEqual(
             got,
@@ -6730,12 +6729,113 @@ async def _test_api_results_count(
         )
         return results
 
-    async def test_cross_object_string_query(self) -> None:
-        """Confirm a cross-object string query return the right results."""
+    async def test_cross_object_string_query_and_hl(self) -> None:
+        """Confirm a cross-object string query return the right results and
+        highlighting is properly applied.
+        """
+
+        # Docket-only query HL
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": f"id:{self.rd_api.pk} cause:(401 Civil) "
+            f"court_citation_string:Appeals juryDemand:Plaintiff "
+            f"docket_id:{self.rd_api.docket_entry.docket.pk} "
+            f"dateArgued:[2022-05-19T00:00:00Z TO 2022-05-21T00:00:00Z]",
+            "assigned_to": "George",
+            "referred_to": "George",
+            "case_name": "America vs API",
+            "docket_number": "1:24-bk-0000",
+            "nature_of_suit": "569",
+            "party_name": "Defendant John Doe",
+            "atty_name": "John Doe",
+            "firm_name": "Associates America",
+        }
+
+        # RECAP Search type HL disabled.
+        r = await self._test_api_results_count(search_params, 1, "API fields")
+        keys_count = len(r[0])
+        self.assertEqual(keys_count, len(recap_type_v4_api_keys))
+        rd_keys_count = len(r[0]["recap_documents"][0])
+        self.assertEqual(rd_keys_count, len(recap_document_v4_api_keys))
+
+        content_to_compare = {
+            "result": self.rd_api,
+            "V4": True,
+            "assignedTo": "<strong>George</strong> Doe II",
+            "caseName": "<strong>America</strong> <strong>vs</strong> <strong>API</strong> Lorem",
+            "cause": "<strong>401</strong> <strong>Civil</strong>",
+            "court_citation_string": "<strong>Appeals</strong>. CA9.",
+            "docketNumber": "<strong>1:24-bk-0000</strong>",
+            "juryDemand": "<strong>Plaintiff</strong>",
+            "referredTo": "<strong>George</strong> Doe II",
+            "suitNature": "<strong>569</strong>",
+            "party": [
+                "<strong>Defendant</strong> <strong>John</strong> <strong>Doe</strong>"
+            ],
+            "firm": ["<strong>Associates</strong> <strong>America</strong>"],
+            "attorney": ["<strong>John</strong> <strong>Doe</strong>"],
+            "docket_id": f"<strong>{self.rd_api.docket_entry.docket.pk}</strong>",
+            "dateArgued": f"<strong>2022-05-19</strong>",
+        }
+        await self._test_api_fields_content(
+            r,
+            content_to_compare,
+            recap_type_v4_api_keys,
+            recap_document_v4_api_keys,
+            v4_recap_meta_keys,
+        )
+
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "order_by": "dateFiled desc",
+        }
+        # Match all query RECAP Search type HL enabled, get snippet from ES.
+        with override_settings(NO_MATCH_HL_SIZE=50):
+            r = await self._test_api_results_count(
+                search_params, 5, "API fields"
+            )
+        content_to_compare = {
+            "result": self.rd_2,
+            "snippet": "Mauris iaculis, leo sit amet hendrerit vehicula, Maecenas",
+            "V4": True,
+        }
+        await self._test_api_fields_content(
+            r,
+            content_to_compare,
+            recap_type_v4_api_keys,
+            recap_document_v4_api_keys,
+            v4_recap_meta_keys,
+        )
+
+    async def test_query_matched_docket_field(self) -> None:
 
+        # Docket-only query HL
         search_params = {
             "type": SEARCH_TYPES.RECAP,
-            "q": f"id:{self.rd_api.pk} cause:(401 Civil) juryDemand:Plaintiff short_description:(Order Letter) plain_text:(shown in the API)",
+            "q": f"id:{self.rd_api.pk} cause:(401 Civil) "
+            f"court_citation_string:Appeals juryDemand:Plaintiff "
+            f"docket_id:{self.rd_api.docket_entry.docket.pk} ",
         }
 
-        await self._test_api_results_count(search_params, 1, "API fields")
+        search_query = DocketSweepDocument.search()
+        results, total_hits = await sync_to_async(do_es_sweep_nested_query)(
+            search_query,
+            search_params,
+        )
+        d_field_matched = docket_field_matched(results[0])
+        self.assertEqual(d_field_matched, True)
+
+        # RECAPDocument-only query HL
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": f"id:{self.rd_api.pk} short_description:(Order Letter) plain_text:(shown in the API)",
+            "description": "MOTION for Leave",
+            "document_number": "2",
+        }
+        search_query = DocketSweepDocument.search()
+        results, total_hits = await sync_to_async(do_es_sweep_nested_query)(
+            search_query,
+            search_params,
+        )
+        d_field_matched = docket_field_matched(results[0])
+        self.assertEqual(d_field_matched, False)
diff --git a/cl/tests/cases.py b/cl/tests/cases.py
index b7bfd2777a..42d025e918 100644
--- a/cl/tests/cases.py
+++ b/cl/tests/cases.py
@@ -10,6 +10,7 @@
 from django_elasticsearch_dsl.registries import registry
 from lxml import etree
 from rest_framework.test import APITestCase
+from rest_framework.utils.serializer_helpers import ReturnList
 
 from cl.lib.redis_utils import get_redis_interface
 from cl.search.models import SEARCH_TYPES
@@ -267,7 +268,10 @@ async def _test_api_fields_content(
             get_expected_value,
         ) in fields_to_compare.items():
             with self.subTest(field=field):
-                parent_document = api_response.data["results"][0]
+                if isinstance(api_response, ReturnList):
+                    parent_document = api_response[0]
+                else:
+                    parent_document = api_response.data["results"][0]
                 actual_value = parent_document.get(field)
                 if field in ["recap_documents", "opinions", "positions"]:
                     child_document = actual_value[0]

From 9307b77554b5ed6b090d3ab3503daf5a46492985 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 24 Jun 2024 20:47:20 -0600
Subject: [PATCH 03/33] fix(alerts): Added cl_send_recap_alerts command

- Test filter out queries and hits based on fields that matched.
---
 .../commands/cl_send_recap_alerts.py          | 112 +++++++++++
 cl/alerts/tests/__init__.py                   |   0
 cl/alerts/{ => tests}/tests.py                |   0
 cl/alerts/tests/tests_recap_alerts.py         | 190 ++++++++++++++++++
 cl/alerts/utils.py                            |  53 ++++-
 cl/lib/elasticsearch_utils.py                 |  58 +-----
 cl/lib/test_helpers.py                        |  62 ++----
 cl/search/api_serializers.py                  |   1 -
 cl/search/constants.py                        |  53 +++--
 cl/search/documents.py                        |  48 +----
 cl/search/tests/tests_es_recap.py             | 169 +---------------
 11 files changed, 412 insertions(+), 334 deletions(-)
 create mode 100644 cl/alerts/management/commands/cl_send_recap_alerts.py
 create mode 100644 cl/alerts/tests/__init__.py
 rename cl/alerts/{ => tests}/tests.py (100%)
 create mode 100644 cl/alerts/tests/tests_recap_alerts.py

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
new file mode 100644
index 0000000000..cefefb1cfb
--- /dev/null
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -0,0 +1,112 @@
+import traceback
+import datetime
+
+from asgiref.sync import async_to_sync
+from django.contrib.auth.models import User
+from django.http import QueryDict
+from django.utils.timezone import now
+from elasticsearch.exceptions import RequestError, TransportError
+
+from cl.lib.command_utils import VerboseCommand, logger
+from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
+from cl.search.documents import DocketSweepDocument
+from cl.search.models import SEARCH_TYPES
+from cl.stats.utils import tally_stat
+from cl.alerts.tasks import send_search_alert_emails
+from cl.alerts.models import Alert
+from cl.search.exception import (
+    BadProximityQuery,
+    UnbalancedParenthesesQuery,
+    UnbalancedQuotesQuery,
+)
+from cl.alerts.utils import recap_document_hl_matched, query_includes_rd_field
+
+
+def index_daily_recap_documents():
+    # TODO implement
+    pass
+
+def has_rd_hit_been_triggered():
+    # TODO implement
+    return False
+
+def has_docket_hit_been_triggered():
+    # TODO implement
+    return True
+
+def query_and_send_alerts(rate):
+    alert_users = User.objects.filter(alerts__rate=rate).distinct()
+    alerts_sent_count = 0
+    now_time = datetime.datetime.now()
+    for user in alert_users:
+        alerts = user.alerts.filter(rate=rate)
+        logger.info(f"Running alerts for user '{user}': {alerts}")
+
+        hits = []
+        alerts_to_update = []
+        for alert in alerts:
+            search_params = QueryDict(alert.query.encode(), mutable=True)
+            includes_rd_fields = query_includes_rd_field(search_params)
+
+            try:
+                search_query = DocketSweepDocument.search()
+                results, total_hits = do_es_sweep_alert_query(
+                    search_query,
+                    search_params,
+                )
+            except (UnbalancedParenthesesQuery,
+        UnbalancedQuotesQuery,
+        BadProximityQuery,TransportError, ConnectionError, RequestError):
+                traceback.print_exc()
+                logger.info(
+                    f"Search for this alert failed: {alert.query}\n"
+                )
+                continue
+
+            alerts_to_update.append(alert.pk)
+            if len(results) > 0:
+                search_type = search_params.get("type", SEARCH_TYPES.OPINION)
+                results_to_send = []
+                for hit in results:
+                    if not includes_rd_fields:
+                        rds_to_send = [rd_hit for rd_hit in hit["child_docs"]
+                                       if not recap_document_hl_matched(
+                                rd_hit) and not has_rd_hit_been_triggered()]
+                        if rds_to_send:
+                            hit["child_docs"] = rds_to_send
+                            results_to_send.append(hit)
+
+                hits.append(
+                    [alert, search_type, results_to_send, len(results_to_send)]
+                )
+                alert.query_run = search_params.urlencode()
+                alert.date_last_hit = now()
+                alert.save()
+
+        if hits:
+            send_search_alert_emails.delay([(user.pk, hits)])
+            alerts_sent_count += 1
+
+        # Update Alert's date_last_hit in bulk.
+        Alert.objects.filter(id__in=alerts_to_update).update(
+            date_last_hit=now_time
+        )
+        async_to_sync(tally_stat)(f"alerts.sent.{rate}", inc=alerts_sent_count)
+        logger.info(f"Sent {alerts_sent_count} {rate} email alerts.")
+
+
+def query_and_schedule_wly_and_mly_alerts():
+    # TODO implement
+    pass
+
+
+class Command(VerboseCommand):
+    help = "Send RECAP Search Alerts."
+
+    def handle(self, *args, **options):
+        super().handle(*args, **options)
+
+        index_daily_recap_documents()
+        query_and_send_alerts(Alert.REAL_TIME)
+        query_and_send_alerts(Alert.DAILY)
+        query_and_schedule_wly_and_mly_alerts()
diff --git a/cl/alerts/tests/__init__.py b/cl/alerts/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/cl/alerts/tests.py b/cl/alerts/tests/tests.py
similarity index 100%
rename from cl/alerts/tests.py
rename to cl/alerts/tests/tests.py
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
new file mode 100644
index 0000000000..4b00408d2c
--- /dev/null
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -0,0 +1,190 @@
+from unittest import mock
+
+import time_machine
+from asgiref.sync import sync_to_async
+from django.core import mail
+from django.core.management import call_command
+from django.utils.timezone import now
+
+from cl.alerts.factories import AlertFactory
+from cl.alerts.models import SEARCH_TYPES, Alert
+from cl.alerts.utils import query_includes_rd_field, recap_document_hl_matched
+from cl.api.factories import WebhookFactory
+from cl.api.models import WebhookEventType
+from cl.donate.models import NeonMembership
+from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
+from cl.lib.test_helpers import RECAPSearchTestCase
+from cl.search.documents import DocketSweepDocument
+from cl.tests.cases import ESIndexTestCase, TestCase
+from cl.tests.utils import MockResponse
+from cl.users.factories import UserProfileWithParentsFactory
+
+
+class RECAPAlertsSweepIndexTest(
+    RECAPSearchTestCase, ESIndexTestCase, TestCase
+):
+    """
+    RECAP Alerts Sweep Index Tests
+    """
+
+    @classmethod
+    def setUpTestData(cls):
+        cls.rebuild_index("people_db.Person")
+        cls.rebuild_index("search.Docket")
+        cls.mock_date = now().replace(day=15, hour=0)
+        with time_machine.travel(cls.mock_date, tick=False):
+            super().setUpTestData()
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                testing_mode=True,
+                sweep_index=True,
+            )
+
+            cls.user_profile = UserProfileWithParentsFactory()
+            NeonMembership.objects.create(
+                level=NeonMembership.LEGACY, user=cls.user_profile.user
+            )
+            cls.webhook_enabled = WebhookFactory(
+                user=cls.user_profile.user,
+                event_type=WebhookEventType.SEARCH_ALERT,
+                url="https://example.com/",
+                enabled=True,
+            )
+            cls.search_alert = AlertFactory(
+                user=cls.user_profile.user,
+                rate=Alert.REAL_TIME,
+                name="Test Alert Docket Only",
+                query='q="401 Civil"&type=r',
+            )
+            cls.search_alert_2 = AlertFactory(
+                user=cls.user_profile.user,
+                rate=Alert.REAL_TIME,
+                name="Test Alert RECAP Only",
+                query='q="Mauris iaculis, leo sit amet hendrerit vehicula"&type=r',
+            )
+            cls.search_alert_3 = AlertFactory(
+                user=cls.user_profile.user,
+                rate=Alert.DAILY,
+                name="Test Cross object",
+                query="q=SUBPOENAS SERVED OFF Mauris iaculis&type=r",
+            )
+
+    async def test_recap_document_hl_matched(self) -> None:
+        """Test recap_document_hl_matched method that determines weather a hit
+        contains RECAPDocument HL fields."""
+        # Docket-only query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"401 Civil"',
+        }
+        search_query = DocketSweepDocument.search()
+        results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
+            search_query,
+            search_params,
+        )
+        docket_result = results[0]
+        for rd in docket_result["child_docs"]:
+            rd_field_matched = recap_document_hl_matched(rd)
+            self.assertEqual(rd_field_matched, False)
+
+        # RECAPDocument-only query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"Mauris iaculis, leo sit amet hendrerit vehicula"',
+        }
+        search_query = DocketSweepDocument.search()
+        results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
+            search_query,
+            search_params,
+        )
+        docket_result = results[0]
+        for rd in docket_result["child_docs"]:
+            rd_field_matched = recap_document_hl_matched(rd)
+            self.assertEqual(rd_field_matched, True)
+
+        # Cross-object query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "SUBPOENAS SERVED OFF Mauris iaculis",
+        }
+        search_query = DocketSweepDocument.search()
+        results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
+            search_query,
+            search_params,
+        )
+        docket_result = results[0]
+        for rd in docket_result["child_docs"]:
+            rd_field_matched = recap_document_hl_matched(rd)
+            self.assertEqual(rd_field_matched, True)
+
+    async def test_query_includes_rd_field(self) -> None:
+        """Test query_includes_rd_field method that checks if a query
+        includes any indexed fields in the query string or filters specific to
+        RECAP Documents.
+        """
+
+        # Docket-only query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": '"401 Civil"',
+        }
+        self.assertEqual(query_includes_rd_field(search_params), False)
+
+        # RECAPDocument-only query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": 'description:"lorem ipsum"',
+        }
+        self.assertEqual(query_includes_rd_field(search_params), True)
+
+        # Cross-object query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": 'case_name:"American v." description:"lorem ipsum"',
+        }
+        self.assertEqual(query_includes_rd_field(search_params), True)
+
+        # Docket-only query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "",
+            "case_name": "SUBPOENAS",
+        }
+        self.assertEqual(query_includes_rd_field(search_params), False)
+
+        # RECAPDocument-only query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "",
+            "description": "Lorem",
+        }
+        self.assertEqual(query_includes_rd_field(search_params), True)
+
+        # Cross-object query
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "",
+            "case_name": "SUBPOENAS",
+            "document_number": 1,
+        }
+        self.assertEqual(query_includes_rd_field(search_params), True)
+
+    def test_filter_out_alerts_to_send(self) -> None:
+        """Test RECAP alerts hit can be properly filtered out according to
+        their query and hits matched conditions.
+        """
+
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+
+        self.assertEqual(
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
+        )
diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py
index d287b3627a..408ae13da0 100644
--- a/cl/alerts/utils.py
+++ b/cl/alerts/utils.py
@@ -4,7 +4,7 @@
 from django.conf import settings
 from django.http import QueryDict
 from elasticsearch_dsl import Q, Search
-from elasticsearch_dsl.response import Response
+from elasticsearch_dsl.response import Hit, Response
 
 from cl.alerts.models import (
     SCHEDULED_ALERT_HIT_STATUS,
@@ -14,9 +14,15 @@
 )
 from cl.lib.command_utils import logger
 from cl.lib.elasticsearch_utils import add_es_highlighting
+from cl.lib.types import CleanData
+from cl.search.constants import (
+    ALERTS_HL_TAG,
+    SEARCH_RECAP_CHILD_HL_FIELDS,
+    recap_document_filters,
+    recap_document_indexed_fields,
+)
 from cl.search.documents import AudioPercolator
 from cl.search.models import SEARCH_TYPES, Docket
-from cl.users.models import UserProfile
 
 
 @dataclass
@@ -138,3 +144,46 @@ def alert_hits_limit_reached(alert_pk: int, user_pk: int) -> bool:
         )
         return True
     return False
+
+
+def recap_document_hl_matched(rd_hit: Hit) -> bool:
+    """Determine whether HL matched a RECAPDocument text field.
+
+    :param rd_hit: The ES hit.
+    :return: True if the hit matched a RECAPDocument field. Otherwise, False.
+    """
+
+    matched_rd_hl = set()
+    rd_hl_fields = set(SEARCH_RECAP_CHILD_HL_FIELDS.keys())
+    if hasattr(rd_hit, "highlight"):
+        highlights = rd_hit.highlight.to_dict()
+        matched_rd_hl.update(
+            hl_key
+            for hl_key, hl_value in highlights.items()
+            for hl in hl_value
+            if f"<{ALERTS_HL_TAG}>" in hl
+        )
+    if matched_rd_hl and matched_rd_hl.issubset(rd_hl_fields):
+        return True
+    return False
+
+
+def query_includes_rd_field(query_params: CleanData) -> bool:
+    """Determine whether the query includes any indexed fields in the query
+    string or filters specific to RECAP Documents.
+
+    :param query_params: The query parameters.
+    :return: True if any recap document fields or filters are included in the
+    query, otherwise False.
+    """
+
+    query_string = query_params.get("q", "")
+    for rd_field in recap_document_indexed_fields:
+        if f"{rd_field}:" in query_string:
+            return True
+
+    for rd_filter in recap_document_filters:
+        if query_params.get(rd_filter, ""):
+            return True
+
+    return False
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 3280abc449..b4eab7c64b 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -58,8 +58,6 @@
     PEOPLE_ES_HL_FIELDS,
     PEOPLE_ES_HL_KEYWORD_FIELDS,
     RELATED_PATTERN,
-    SEARCH_ALERTS_DOCKET_HL_FIELDS,
-    SEARCH_ALERTS_DOCKET_KEYWORDS_HL_FIELDS,
     SEARCH_ALERTS_ORAL_ARGUMENT_ES_HL_FIELDS,
     SEARCH_HL_TAG,
     SEARCH_OPINION_HL_FIELDS,
@@ -1288,7 +1286,7 @@ def build_child_docs_query(
         query
         for query in parent_filters
         if isinstance(query, QueryString)
-        and query.fields[0] in ["party", "attorney", "firm"]
+        and query.fields[0] in ["party", "attorney"]
     ]
     parties_has_parent_query = build_has_parent_parties_query(parties_filters)
 
@@ -1463,15 +1461,7 @@ def add_es_highlighting(
             highlighting_fields = PEOPLE_ES_HL_FIELDS
             highlighting_keyword_fields = PEOPLE_ES_HL_KEYWORD_FIELDS
         case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS:
-            highlighting_fields = (
-                SEARCH_ALERTS_DOCKET_HL_FIELDS
-                if alerts
-                else SEARCH_RECAP_HL_FIELDS
-            )
-            if alerts:
-                highlighting_keyword_fields = (
-                    SEARCH_ALERTS_DOCKET_KEYWORDS_HL_FIELDS
-                )
+            highlighting_fields = SEARCH_RECAP_HL_FIELDS
         case SEARCH_TYPES.OPINION:
             highlighting_fields = SEARCH_OPINION_HL_FIELDS
 
@@ -2112,7 +2102,6 @@ def build_join_es_filters(cd: CleanData) -> List:
                 *build_text_filter("referredTo", cd.get("referred_to", "")),
                 *build_text_filter("party", cd.get("party_name", "")),
                 *build_text_filter("attorney", cd.get("atty_name", "")),
-                *build_text_filter("firm", cd.get("firm_name", "")),
                 *build_daterange_query(
                     "dateFiled",
                     cd.get("filed_before", ""),
@@ -2393,7 +2382,7 @@ def build_full_join_es_queries(
             query
             for query in parent_filters
             if isinstance(query, QueryString)
-            and query.fields[0] in ["party", "attorney", "firm"]
+            and query.fields[0] in ["party", "attorney"]
         ]
         has_parent_parties_filter = build_has_parent_parties_query(
             parties_filters
@@ -3039,7 +3028,7 @@ def do_es_alert_estimation_query(
     return estimation_query.count()
 
 
-def do_es_sweep_nested_query(
+def do_es_sweep_alert_query(
     search_query: Search,
     cd: CleanData,
 ) -> tuple[list[Hit] | None, int | None]:
@@ -3058,21 +3047,16 @@ def do_es_sweep_nested_query(
     else:
         return None, None
 
-    hits = None
-    try:
-        s, _ = build_es_base_query(search_query, cd, True, alerts=True)
-    except (
-        UnbalancedParenthesesQuery,
-        UnbalancedQuotesQuery,
-        BadProximityQuery,
-    ) as e:
-        raise ElasticBadRequestError(detail=e.message)
+    total_hits = None
+
+    s, _ = build_es_base_query(search_query, cd, True, alerts=True)
+
     main_query = add_es_highlighting(s, cd, alerts=True)
     main_query = main_query.sort(build_sort_results(cd))
     main_query = main_query.extra(from_=0, size=30)
     results = main_query.execute()
     if results:
-        hits = results.hits.total.value
+        total_hits = results.hits.total.value
 
     limit_inner_hits({}, results, cd["type"])
     set_results_highlights(results, cd["type"])
@@ -3081,27 +3065,7 @@ def do_es_sweep_nested_query(
         child_result_objects = []
         if hasattr(result, "child_docs"):
             for child_doc in result.child_docs:
-                child_result_objects.append(
-                    defaultdict(lambda: None, child_doc["_source"].to_dict())
-                )
+                child_result_objects.append(child_doc.to_dict())
             result["child_docs"] = child_result_objects
 
-    return results, hits
-
-
-def docket_field_matched(hit: Hit) -> bool:
-    """Determine whether HL matched a Docket field.
-
-    :param hit: The ES hit.
-    :return: True if the hit matched a Docket field. Otherwise, False.
-    """
-
-    plain_hl = set(SEARCH_ALERTS_DOCKET_KEYWORDS_HL_FIELDS)
-    vector_hl = set(SEARCH_ALERTS_DOCKET_HL_FIELDS.keys())
-    docket_hl = set()
-    if hasattr(hit.meta, "highlight"):
-        highlights = hit.meta.highlight.to_dict()
-        docket_hl = set([hl for hl in highlights.keys()])
-    if docket_hl.issubset(plain_hl.union(vector_hl)):
-        return True
-    return False
+    return results, total_hits
diff --git a/cl/lib/test_helpers.py b/cl/lib/test_helpers.py
index d7e1c49ebc..69976430f1 100644
--- a/cl/lib/test_helpers.py
+++ b/cl/lib/test_helpers.py
@@ -278,19 +278,15 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
     ),
     "court_id": lambda x: x["result"].docket_entry.docket.court.pk,
     "dateArgued": lambda x: (
-        x["dateArgued"]
-        if x.get("dateArgued")
-        else (
-            (
-                x["result"].docket_entry.docket.date_argued.isoformat()
-                if x.get("V4")
-                else midnight_pt_test(
-                    x["result"].docket_entry.docket.date_argued
-                ).isoformat()
-            )
-            if x["result"].docket_entry.docket.date_argued
-            else None
+        (
+            x["result"].docket_entry.docket.date_argued.isoformat()
+            if x.get("V4")
+            else midnight_pt_test(
+                x["result"].docket_entry.docket.date_argued
+            ).isoformat()
         )
+        if x["result"].docket_entry.docket.date_argued
+        else None
     ),
     "dateFiled": lambda x: (
         (
@@ -319,11 +315,7 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
         if x.get("docketNumber")
         else x["result"].docket_entry.docket.docket_number
     ),
-    "docket_id": lambda x: (
-        x["docket_id"]
-        if x.get("docket_id")
-        else x["result"].docket_entry.docket_id
-    ),
+    "docket_id": lambda x: x["result"].docket_entry.docket_id,
     "jurisdictionType": lambda x: x[
         "result"
     ].docket_entry.docket.jurisdiction_type,
@@ -356,14 +348,10 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
 recap_type_v4_api_keys = docket_api_common_keys.copy()
 recap_type_v4_api_keys.update(
     {
-        "attorney": lambda x: (
-            x["attorney"]
-            if x.get("attorney")
-            else list(
-                DocketDocument().prepare_parties(
-                    x["result"].docket_entry.docket
-                )["attorney"]
-            )
+        "attorney": lambda x: list(
+            DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
+                "attorney"
+            ]
         ),
         "attorney_id": lambda x: list(
             DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
@@ -383,14 +371,10 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
         "docket_absolute_url": lambda x: x[
             "result"
         ].docket_entry.docket.get_absolute_url(),
-        "firm": lambda x: (
-            x["firm"]
-            if x.get("firm")
-            else list(
-                DocketDocument().prepare_parties(
-                    x["result"].docket_entry.docket
-                )["firm"]
-            )
+        "firm": lambda x: list(
+            DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
+                "firm"
+            ]
         ),
         "firm_id": lambda x: list(
             DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
@@ -402,14 +386,10 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
             if x["result"].docket_entry.docket.pacer_case_id
             else ""
         ),
-        "party": lambda x: (
-            x["party"]
-            if x.get("party")
-            else list(
-                DocketDocument().prepare_parties(
-                    x["result"].docket_entry.docket
-                )["party"]
-            )
+        "party": lambda x: list(
+            DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
+                "party"
+            ]
         ),
         "party_id": lambda x: list(
             DocketDocument().prepare_parties(x["result"].docket_entry.docket)[
diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py
index f5abc46b93..1f9cbb7d75 100644
--- a/cl/search/api_serializers.py
+++ b/cl/search/api_serializers.py
@@ -528,7 +528,6 @@ class BaseDocketESResultSerializer(DocumentSerializer):
     attorney = NoneToListField(read_only=True, required=False)
     firm_id = NoneToListField(read_only=True, required=False)
     firm = NoneToListField(read_only=True, required=False)
-    docket_id = HighlightedField(read_only=True)
 
     class Meta:
         document = DocketDocument
diff --git a/cl/search/constants.py b/cl/search/constants.py
index 0457e53799..2d0d011962 100644
--- a/cl/search/constants.py
+++ b/cl/search/constants.py
@@ -155,35 +155,6 @@
     "referredTo": 0,
     "suitNature": 0,
 }
-
-SEARCH_ALERTS_DOCKET_HL_FIELDS = {
-    "assignedTo": 0,
-    "caseName": 0,
-    "cause": 0,
-    "court_citation_string": 0,
-    "docketNumber": 0,
-    "juryDemand": 0,
-    "referredTo": 0,
-    "suitNature": 0,
-    "party": 0,
-    "attorney": 0,
-    "firm": 0,
-}
-
-SEARCH_ALERTS_DOCKET_KEYWORDS_HL_FIELDS = {
-    "docket_id",
-    "court_id",
-    "firm_id",
-    "assigned_to_id",
-    "referred_to_id",
-    "dateFiled",
-    "dateArgued",
-    "dateTerminated",
-    "jurisdictionType",
-    "chapter",
-    "trustee_str",
-}
-
 SEARCH_OPINION_HL_FIELDS = {
     "caseName": 0,
     "citation": 0,
@@ -321,3 +292,27 @@
     Opinion.ON_MOTION_TO_STRIKE: "on-motion-to-strike",
     Opinion.TRIAL_COURT: "trial-court-document",
 }
+
+recap_document_indexed_fields = [
+    "id",
+    "docket_entry_id",
+    "description",
+    "entry_number",
+    "entry_date_filed",
+    "short_description",
+    "document_type",
+    "document_number",
+    "pacer_doc_id",
+    "plain_text",
+    "attachment_number",
+    "is_available",
+    "page_count",
+    "cites",
+]
+
+recap_document_filters = [
+    "available_only",
+    "description",
+    "document_number",
+    "attachment_number",
+]
diff --git a/cl/search/documents.py b/cl/search/documents.py
index 957f03034e..d64f4eb724 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -790,7 +790,7 @@ class DocketBaseDocument(Document):
     timestamp = fields.DateField()
 
     # Docket Fields
-    docket_id = fields.KeywordField(attr="pk")
+    docket_id = fields.IntegerField(attr="pk")
     caseName = fields.TextField(
         analyzer="text_en_splitting_cl",
         term_vector="with_positions_offsets",
@@ -1831,51 +1831,6 @@ def prepare_cluster_child(self, instance):
 
 @recap_sweep_index.document
 class DocketSweepDocument(DocketDocument):
-    party = fields.ListField(
-        fields.TextField(
-            analyzer="text_en_splitting_cl",
-            term_vector="with_positions_offsets",
-            fields={
-                "exact": fields.TextField(
-                    analyzer="english_exact",
-                    term_vector="with_positions_offsets",
-                    search_analyzer="search_analyzer_exact",
-                ),
-            },
-            search_analyzer="search_analyzer",
-            multi=True,
-        )
-    )
-    attorney = fields.ListField(
-        fields.TextField(
-            analyzer="text_en_splitting_cl",
-            term_vector="with_positions_offsets",
-            fields={
-                "exact": fields.TextField(
-                    analyzer="english_exact",
-                    term_vector="with_positions_offsets",
-                    search_analyzer="search_analyzer_exact",
-                ),
-            },
-            search_analyzer="search_analyzer",
-            multi=True,
-        )
-    )
-    firm = fields.ListField(
-        fields.TextField(
-            analyzer="text_en_splitting_cl",
-            term_vector="with_positions_offsets",
-            fields={
-                "exact": fields.TextField(
-                    analyzer="english_exact",
-                    term_vector="with_positions_offsets",
-                    search_analyzer="search_analyzer_exact",
-                ),
-            },
-            search_analyzer="search_analyzer",
-            multi=True,
-        )
-    )
 
     class Django:
         model = Docket
@@ -1884,6 +1839,7 @@ class Django:
 
 @recap_sweep_index.document
 class ESRECAPSweepDocument(ESRECAPDocument):
+
     class Django:
         model = RECAPDocument
         ignore_signals = True
diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py
index 2cc46a3842..389193b204 100644
--- a/cl/search/tests/tests_es_recap.py
+++ b/cl/search/tests/tests_es_recap.py
@@ -19,8 +19,6 @@
 
 from cl.lib.elasticsearch_utils import (
     build_es_main_query,
-    do_es_sweep_nested_query,
-    docket_field_matched,
     fetch_es_results,
     merge_unavailable_fields_on_parent_document,
     set_results_highlights,
@@ -50,12 +48,7 @@
     RECAPESResultSerializer,
 )
 from cl.search.api_views import SearchV4ViewSet
-from cl.search.documents import (
-    ES_CHILD_ID,
-    DocketDocument,
-    DocketSweepDocument,
-    ESRECAPDocument,
-)
+from cl.search.documents import ES_CHILD_ID, DocketDocument, ESRECAPDocument
 from cl.search.factories import (
     BankruptcyInformationFactory,
     CourtFactory,
@@ -6679,163 +6672,3 @@ def test_recap_history_table_indexing(self) -> None:
         )
         if keys:
             self.r.delete(*keys)
-
-
-class RECAPSearchSweepIndexTest(
-    RECAPSearchAPICommonTests, ESIndexTestCase, TestCase, V4SearchAPIAssertions
-):
-    """
-    RECAP Sweep Index Tests
-    """
-
-    version_api = "v4"
-    skip_common_tests = False
-
-    @classmethod
-    def setUpTestData(cls):
-        cls.rebuild_index("people_db.Person")
-        cls.rebuild_index("search.Docket")
-        cls.mock_date = now().replace(day=15, hour=0)
-        with time_machine.travel(cls.mock_date, tick=False):
-            super().setUpTestData()
-            call_command(
-                "cl_index_parent_and_child_docs",
-                search_type=SEARCH_TYPES.RECAP,
-                queue="celery",
-                pk_offset=0,
-                testing_mode=True,
-                sweep_index=True,
-            )
-
-    async def _test_api_results_count(
-        self, params, expected_count, field_name
-    ):
-
-        search_query = DocketSweepDocument.search()
-        results, total_hits = await sync_to_async(do_es_sweep_nested_query)(
-            search_query,
-            params,
-        )
-        results = RECAPESResultSerializer(results, many=True).data
-        got = len(results)
-        self.assertEqual(
-            got,
-            expected_count,
-            msg="Did not get the right number of search results in API with %s "
-            "filter applied.\n"
-            "Expected: %s\n"
-            "     Got: %s\n\n"
-            "Params were: %s" % (field_name, expected_count, got, params),
-        )
-        return results
-
-    async def test_cross_object_string_query_and_hl(self) -> None:
-        """Confirm a cross-object string query return the right results and
-        highlighting is properly applied.
-        """
-
-        # Docket-only query HL
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": f"id:{self.rd_api.pk} cause:(401 Civil) "
-            f"court_citation_string:Appeals juryDemand:Plaintiff "
-            f"docket_id:{self.rd_api.docket_entry.docket.pk} "
-            f"dateArgued:[2022-05-19T00:00:00Z TO 2022-05-21T00:00:00Z]",
-            "assigned_to": "George",
-            "referred_to": "George",
-            "case_name": "America vs API",
-            "docket_number": "1:24-bk-0000",
-            "nature_of_suit": "569",
-            "party_name": "Defendant John Doe",
-            "atty_name": "John Doe",
-            "firm_name": "Associates America",
-        }
-
-        # RECAP Search type HL disabled.
-        r = await self._test_api_results_count(search_params, 1, "API fields")
-        keys_count = len(r[0])
-        self.assertEqual(keys_count, len(recap_type_v4_api_keys))
-        rd_keys_count = len(r[0]["recap_documents"][0])
-        self.assertEqual(rd_keys_count, len(recap_document_v4_api_keys))
-
-        content_to_compare = {
-            "result": self.rd_api,
-            "V4": True,
-            "assignedTo": "<strong>George</strong> Doe II",
-            "caseName": "<strong>America</strong> <strong>vs</strong> <strong>API</strong> Lorem",
-            "cause": "<strong>401</strong> <strong>Civil</strong>",
-            "court_citation_string": "<strong>Appeals</strong>. CA9.",
-            "docketNumber": "<strong>1:24-bk-0000</strong>",
-            "juryDemand": "<strong>Plaintiff</strong>",
-            "referredTo": "<strong>George</strong> Doe II",
-            "suitNature": "<strong>569</strong>",
-            "party": [
-                "<strong>Defendant</strong> <strong>John</strong> <strong>Doe</strong>"
-            ],
-            "firm": ["<strong>Associates</strong> <strong>America</strong>"],
-            "attorney": ["<strong>John</strong> <strong>Doe</strong>"],
-            "docket_id": f"<strong>{self.rd_api.docket_entry.docket.pk}</strong>",
-            "dateArgued": f"<strong>2022-05-19</strong>",
-        }
-        await self._test_api_fields_content(
-            r,
-            content_to_compare,
-            recap_type_v4_api_keys,
-            recap_document_v4_api_keys,
-            v4_recap_meta_keys,
-        )
-
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "order_by": "dateFiled desc",
-        }
-        # Match all query RECAP Search type HL enabled, get snippet from ES.
-        with override_settings(NO_MATCH_HL_SIZE=50):
-            r = await self._test_api_results_count(
-                search_params, 5, "API fields"
-            )
-        content_to_compare = {
-            "result": self.rd_2,
-            "snippet": "Mauris iaculis, leo sit amet hendrerit vehicula, Maecenas",
-            "V4": True,
-        }
-        await self._test_api_fields_content(
-            r,
-            content_to_compare,
-            recap_type_v4_api_keys,
-            recap_document_v4_api_keys,
-            v4_recap_meta_keys,
-        )
-
-    async def test_query_matched_docket_field(self) -> None:
-
-        # Docket-only query HL
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": f"id:{self.rd_api.pk} cause:(401 Civil) "
-            f"court_citation_string:Appeals juryDemand:Plaintiff "
-            f"docket_id:{self.rd_api.docket_entry.docket.pk} ",
-        }
-
-        search_query = DocketSweepDocument.search()
-        results, total_hits = await sync_to_async(do_es_sweep_nested_query)(
-            search_query,
-            search_params,
-        )
-        d_field_matched = docket_field_matched(results[0])
-        self.assertEqual(d_field_matched, True)
-
-        # RECAPDocument-only query HL
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": f"id:{self.rd_api.pk} short_description:(Order Letter) plain_text:(shown in the API)",
-            "description": "MOTION for Leave",
-            "document_number": "2",
-        }
-        search_query = DocketSweepDocument.search()
-        results, total_hits = await sync_to_async(do_es_sweep_nested_query)(
-            search_query,
-            search_params,
-        )
-        d_field_matched = docket_field_matched(results[0])
-        self.assertEqual(d_field_matched, False)

From 9b4e1c1aa58c92f7e195b64fc866295288874311 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 25 Jun 2024 02:48:20 +0000
Subject: [PATCH 04/33] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../commands/cl_send_recap_alerts.py          | 39 ++++++++++++-------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index cefefb1cfb..3399a6801f 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -1,5 +1,5 @@
-import traceback
 import datetime
+import traceback
 
 from asgiref.sync import async_to_sync
 from django.contrib.auth.models import User
@@ -7,33 +7,36 @@
 from django.utils.timezone import now
 from elasticsearch.exceptions import RequestError, TransportError
 
+from cl.alerts.models import Alert
+from cl.alerts.tasks import send_search_alert_emails
+from cl.alerts.utils import query_includes_rd_field, recap_document_hl_matched
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.search.documents import DocketSweepDocument
-from cl.search.models import SEARCH_TYPES
-from cl.stats.utils import tally_stat
-from cl.alerts.tasks import send_search_alert_emails
-from cl.alerts.models import Alert
 from cl.search.exception import (
     BadProximityQuery,
     UnbalancedParenthesesQuery,
     UnbalancedQuotesQuery,
 )
-from cl.alerts.utils import recap_document_hl_matched, query_includes_rd_field
+from cl.search.models import SEARCH_TYPES
+from cl.stats.utils import tally_stat
 
 
 def index_daily_recap_documents():
     # TODO implement
     pass
 
+
 def has_rd_hit_been_triggered():
     # TODO implement
     return False
 
+
 def has_docket_hit_been_triggered():
     # TODO implement
     return True
 
+
 def query_and_send_alerts(rate):
     alert_users = User.objects.filter(alerts__rate=rate).distinct()
     alerts_sent_count = 0
@@ -54,13 +57,16 @@ def query_and_send_alerts(rate):
                     search_query,
                     search_params,
                 )
-            except (UnbalancedParenthesesQuery,
-        UnbalancedQuotesQuery,
-        BadProximityQuery,TransportError, ConnectionError, RequestError):
+            except (
+                UnbalancedParenthesesQuery,
+                UnbalancedQuotesQuery,
+                BadProximityQuery,
+                TransportError,
+                ConnectionError,
+                RequestError,
+            ):
                 traceback.print_exc()
-                logger.info(
-                    f"Search for this alert failed: {alert.query}\n"
-                )
+                logger.info(f"Search for this alert failed: {alert.query}\n")
                 continue
 
             alerts_to_update.append(alert.pk)
@@ -69,9 +75,12 @@ def query_and_send_alerts(rate):
                 results_to_send = []
                 for hit in results:
                     if not includes_rd_fields:
-                        rds_to_send = [rd_hit for rd_hit in hit["child_docs"]
-                                       if not recap_document_hl_matched(
-                                rd_hit) and not has_rd_hit_been_triggered()]
+                        rds_to_send = [
+                            rd_hit
+                            for rd_hit in hit["child_docs"]
+                            if not recap_document_hl_matched(rd_hit)
+                            and not has_rd_hit_been_triggered()
+                        ]
                         if rds_to_send:
                             hit["child_docs"] = rds_to_send
                             results_to_send.append(hit)

From 8b537f0b8fd4f272d6ce6439672e7b6c29bf5be8 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 26 Jun 2024 20:34:19 -0600
Subject: [PATCH 05/33] fix(alerts): Implemented filtering of RECAP alerts hits
 for the sweep index command.

---
 .../commands/cl_send_recap_alerts.py          | 116 ++++++--
 cl/alerts/tests/tests_recap_alerts.py         | 263 ++++++++++++++++--
 cl/alerts/utils.py                            |  42 +++
 3 files changed, 377 insertions(+), 44 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 3399a6801f..537626bc17 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -6,19 +6,26 @@
 from django.http import QueryDict
 from django.utils.timezone import now
 from elasticsearch.exceptions import RequestError, TransportError
+from redis import Redis
 
 from cl.alerts.models import Alert
 from cl.alerts.tasks import send_search_alert_emails
-from cl.alerts.utils import query_includes_rd_field, recap_document_hl_matched
+from cl.alerts.utils import (
+    add_document_hit_to_alert_set,
+    has_document_alert_hit_been_triggered,
+    query_includes_rd_field,
+    recap_document_hl_matched,
+)
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
+from cl.lib.redis_utils import get_redis_interface
 from cl.search.documents import DocketSweepDocument
 from cl.search.exception import (
     BadProximityQuery,
     UnbalancedParenthesesQuery,
     UnbalancedQuotesQuery,
 )
-from cl.search.models import SEARCH_TYPES
+from cl.search.models import SEARCH_TYPES, Docket
 from cl.stats.utils import tally_stat
 
 
@@ -27,22 +34,65 @@ def index_daily_recap_documents():
     pass
 
 
-def has_rd_hit_been_triggered():
-    # TODO implement
+def should_docket_hit_be_included(
+    r: Redis, alert_id: int, docket_id: int
+) -> bool:
+    """Determine if a Docket alert should be triggered based on its
+    date_modified and if the docket has triggered the alert previously.
+
+    :param r: The Redis interface.
+    :param alert_id: The ID of the alert.
+    :param docket_id: The ID of the docket.
+    :return: True if the Docket alert should be triggered, False otherwise.
+    """
+    docket = Docket.objects.filter(id=docket_id).only("date_modified").first()
+    if not docket:
+        return False
+    date_modified = docket.date_modified.date()
+    if not has_document_alert_hit_been_triggered(r, alert_id, "d", docket_id):
+        if date_modified == now().date():
+            return True
     return False
 
 
-def has_docket_hit_been_triggered():
-    # TODO implement
-    return True
+def filter_rd_alert_hits(r, alert_id, rd_hits, check_rd_hl=False):
+    """Filter RECAP document hits based on specified conditions.
+
+    :param r: The Redis interface.
+    :param alert_id: The ID of the alert.
+    :param rd_hits: A list of RECAP document hits to be processed.
+    :param check_rd_hl: A boolean indicating whether to check if the RECAP
+    document hit matched RD HLs.
+    :return: A list of RECAP document hits that meet all specified conditions.
+    """
+
+    rds_to_send = []
+    for rd_hit in rd_hits:
+        conditions = [
+            not has_document_alert_hit_been_triggered(
+                r, alert_id, "r", rd_hit["_source"]["id"]
+            )
+        ]
+        if check_rd_hl:
+            conditions.append(recap_document_hl_matched(rd_hit))
+        if all(conditions):
+            rds_to_send.append(rd_hit)
+            add_document_hit_to_alert_set(
+                r, alert_id, "r", rd_hit["_source"]["id"]
+            )
+    return rds_to_send
 
 
 def query_and_send_alerts(rate):
+    r = get_redis_interface("CACHE")
     alert_users = User.objects.filter(alerts__rate=rate).distinct()
     alerts_sent_count = 0
     now_time = datetime.datetime.now()
     for user in alert_users:
-        alerts = user.alerts.filter(rate=rate)
+        if rate == Alert.REAL_TIME:
+            if not user.profile.is_member:
+                continue
+        alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP)
         logger.info(f"Running alerts for user '{user}': {alerts}")
 
         hits = []
@@ -50,7 +100,6 @@ def query_and_send_alerts(rate):
         for alert in alerts:
             search_params = QueryDict(alert.query.encode(), mutable=True)
             includes_rd_fields = query_includes_rd_field(search_params)
-
             try:
                 search_query = DocketSweepDocument.search()
                 results, total_hits = do_es_sweep_alert_query(
@@ -71,26 +120,48 @@ def query_and_send_alerts(rate):
 
             alerts_to_update.append(alert.pk)
             if len(results) > 0:
-                search_type = search_params.get("type", SEARCH_TYPES.OPINION)
+                search_type = search_params.get("type", SEARCH_TYPES.RECAP)
                 results_to_send = []
                 for hit in results:
                     if not includes_rd_fields:
-                        rds_to_send = [
-                            rd_hit
-                            for rd_hit in hit["child_docs"]
-                            if not recap_document_hl_matched(rd_hit)
-                            and not has_rd_hit_been_triggered()
-                        ]
+                        # Possible Docket-only query
+                        rds_to_send = filter_rd_alert_hits(
+                            r, alert.pk, hit["child_docs"], check_rd_hl=True
+                        )
                         if rds_to_send:
+                            # Cross-object query
+                            hit["child_docs"] = rds_to_send
+                            results_to_send.append(hit)
+                        elif should_docket_hit_be_included(
+                            r, alert.pk, hit.docket_id
+                        ):
+                            hit["child_docs"] = []
+                            results_to_send.append(hit)
+                            add_document_hit_to_alert_set(
+                                r, alert.pk, "d", hit.docket_id
+                            )
+                    else:
+                        # RECAP-only alerts or cross-object alerts
+                        rds_to_send = filter_rd_alert_hits(
+                            r, alert.pk, hit["child_docs"]
+                        )
+                        if rds_to_send:
+                            # Cross-object query
                             hit["child_docs"] = rds_to_send
                             results_to_send.append(hit)
 
-                hits.append(
-                    [alert, search_type, results_to_send, len(results_to_send)]
-                )
-                alert.query_run = search_params.urlencode()
-                alert.date_last_hit = now()
-                alert.save()
+                if results_to_send:
+                    hits.append(
+                        [
+                            alert,
+                            search_type,
+                            results_to_send,
+                            len(results_to_send),
+                        ]
+                    )
+                    alert.query_run = search_params.urlencode()
+                    alert.date_last_hit = now()
+                    alert.save()
 
         if hits:
             send_search_alert_emails.delay([(user.pk, hits)])
@@ -114,7 +185,6 @@ class Command(VerboseCommand):
 
     def handle(self, *args, **options):
         super().handle(*args, **options)
-
         index_daily_recap_documents()
         query_and_send_alerts(Alert.REAL_TIME)
         query_and_send_alerts(Alert.DAILY)
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 4b00408d2c..d3b345f36e 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -1,3 +1,4 @@
+import datetime
 from unittest import mock
 
 import time_machine
@@ -15,6 +16,12 @@
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.lib.test_helpers import RECAPSearchTestCase
 from cl.search.documents import DocketSweepDocument
+from cl.search.factories import (
+    DocketEntryWithParentsFactory,
+    DocketFactory,
+    RECAPDocumentFactory,
+)
+from cl.search.models import Docket
 from cl.tests.cases import ESIndexTestCase, TestCase
 from cl.tests.utils import MockResponse
 from cl.users.factories import UserProfileWithParentsFactory
@@ -31,7 +38,7 @@ class RECAPAlertsSweepIndexTest(
     def setUpTestData(cls):
         cls.rebuild_index("people_db.Person")
         cls.rebuild_index("search.Docket")
-        cls.mock_date = now().replace(day=15, hour=0)
+        cls.mock_date = now()
         with time_machine.travel(cls.mock_date, tick=False):
             super().setUpTestData()
             call_command(
@@ -47,30 +54,26 @@ def setUpTestData(cls):
             NeonMembership.objects.create(
                 level=NeonMembership.LEGACY, user=cls.user_profile.user
             )
+            cls.user_profile_2 = UserProfileWithParentsFactory()
+            NeonMembership.objects.create(
+                level=NeonMembership.LEGACY, user=cls.user_profile_2.user
+            )
+            cls.user_profile_no_member = UserProfileWithParentsFactory()
             cls.webhook_enabled = WebhookFactory(
                 user=cls.user_profile.user,
                 event_type=WebhookEventType.SEARCH_ALERT,
                 url="https://example.com/",
                 enabled=True,
             )
-            cls.search_alert = AlertFactory(
-                user=cls.user_profile.user,
-                rate=Alert.REAL_TIME,
-                name="Test Alert Docket Only",
-                query='q="401 Civil"&type=r',
-            )
-            cls.search_alert_2 = AlertFactory(
-                user=cls.user_profile.user,
-                rate=Alert.REAL_TIME,
-                name="Test Alert RECAP Only",
-                query='q="Mauris iaculis, leo sit amet hendrerit vehicula"&type=r',
-            )
-            cls.search_alert_3 = AlertFactory(
-                user=cls.user_profile.user,
-                rate=Alert.DAILY,
-                name="Test Cross object",
-                query="q=SUBPOENAS SERVED OFF Mauris iaculis&type=r",
-            )
+
+    @staticmethod
+    def get_html_content_from_email(email_content):
+        html_content = None
+        for content, content_type in email_content.alternatives:
+            if content_type == "text/html":
+                html_content = content
+                break
+        return html_content
 
     async def test_recap_document_hl_matched(self) -> None:
         """Test recap_document_hl_matched method that determines weather a hit
@@ -172,11 +175,83 @@ async def test_query_includes_rd_field(self) -> None:
         }
         self.assertEqual(query_includes_rd_field(search_params), True)
 
-    def test_filter_out_alerts_to_send(self) -> None:
-        """Test RECAP alerts hit can be properly filtered out according to
+    def test_filter_recap_alerts_to_send(self) -> None:
+        """Test filter RECAP alerts that met the conditions to be sent:
+        - RECAP type alert.
+        - RT or DLY rate
+        - For RT rate the user must have an active membership.
+        """
+
+        rt_recap_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test RT RECAP Alert",
+            query='q="401 Civil"&type=r',
+        )
+        dly_recap_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.DAILY,
+            name="Test DLY RECAP Alert",
+            query='q="401 Civil"&type=r',
+        )
+        AlertFactory(
+            user=self.user_profile_2.user,
+            rate=Alert.REAL_TIME,
+            name="Test RT Opinion Alert",
+            query='q="401 Civil"',
+        )
+        AlertFactory(
+            user=self.user_profile_no_member.user,
+            rate=Alert.REAL_TIME,
+            name="Test RT RECAP Alert no Member",
+            query='q="401 Civil"&type=r',
+        )
+
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+
+        # Only the RECAP RT alert for a member and the RECAP DLY alert are sent.
+        self.assertEqual(
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
+        )
+        html_content = self.get_html_content_from_email(mail.outbox[0])
+        self.assertIn(rt_recap_alert.name, html_content)
+
+        html_content = self.get_html_content_from_email(mail.outbox[1])
+        self.assertIn(dly_recap_alert.name, html_content)
+
+    def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
+        """Test RECAP alerts can be properly filtered out according to
         their query and hits matched conditions.
+
+        - Docket-only Alerts should be triggered only if the Docket was
+          modified on the day. This prevents sending Alerts due to related
+          RDs added on the same day which can match the query due to parent
+          fields indexed into the RDs.
+            - The Docket or RD shouldn’t have triggered the alert previously.
+            - RECAP-only Alerts should only include RDs that have not triggered the
+              same alert previously. If there are no hits after filtering RDs,
+              don’t send the alert.
+            - Cross-object queries should only include RDs that have not triggered
+              the same alert previously. If there are no hits after filtering RDs,
+              don’t send the alert.
+
+        Assert the content structure accordingly.
         """
 
+        # This docket-only alert, matches a Docket added today.
+
+        docket_only_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Docket Only",
+            query='q="401 Civil"&type=r',
+        )
         with mock.patch(
             "cl.api.webhooks.requests.post",
             side_effect=lambda *args, **kwargs: MockResponse(
@@ -185,6 +260,152 @@ def test_filter_out_alerts_to_send(self) -> None:
         ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts")
 
+        self.assertEqual(
+            len(mail.outbox), 1, msg="Outgoing emails don't match."
+        )
+        html_content = self.get_html_content_from_email(mail.outbox[0])
+        self.assertIn(docket_only_alert.name, html_content)
+
+        # This test shouldn't match the Docket-only alert when the RD is added
+        # today since its parent Docket was not modified today.
+        AlertFactory(
+            user=self.user_profile_2.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Docket Only Not Triggered",
+            query='q="405 Civil"&type=r',
+        )
+        one_day_before = now() - datetime.timedelta(days=1)
+        mock_date = one_day_before.replace(hour=5)
+        with time_machine.travel(mock_date, tick=False):
+            docket = DocketFactory(
+                court=self.court,
+                case_name="SUBPOENAS SERVED CASE",
+                case_name_full="Jackson & Sons Holdings vs. Bank",
+                docket_number="1:21-bk-1234",
+                nature_of_suit="440",
+                source=Docket.RECAP,
+                cause="405 Civil",
+                jurisdiction_type="'U.S. Government Defendant",
+                jury_demand="1,000,000",
+            )
+
+        mock_date = now().replace(hour=5)
+        with time_machine.travel(mock_date, tick=False):
+            de = DocketEntryWithParentsFactory(
+                docket=docket,
+                entry_number=1,
+                date_filed=datetime.date(2024, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            rd = RECAPDocumentFactory(
+                docket_entry=de,
+                description="Motion to File",
+                document_number="1",
+                is_available=True,
+                page_count=5,
+                pacer_doc_id="018036652436",
+                plain_text="plain text for 018036652436",
+            )
+
+        call_command(
+            "cl_index_parent_and_child_docs",
+            search_type=SEARCH_TYPES.RECAP,
+            queue="celery",
+            pk_offset=0,
+            testing_mode=True,
+            sweep_index=True,
+        )
+
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+        # No new alert should be triggered.
+        self.assertEqual(
+            len(mail.outbox), 1, msg="Outgoing emails don't match."
+        )
+
+        recap_only_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert RECAP Only",
+            query='q="plain text for 018036652436"&type=r',
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+        # 1 New alert should be triggered.
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
         )
+
+        # Trigger the alert again.
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+        # No new alert should be triggered.
+        self.assertEqual(
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
+        )
+
+        # Create a new RD for the same DocketEntry.
+        rd = RECAPDocumentFactory(
+            docket_entry=de,
+            description="Motion to File 2",
+            document_number="2",
+            is_available=True,
+            page_count=3,
+            pacer_doc_id="018036652436",
+            plain_text="plain text for 018036652436",
+        )
+        call_command(
+            "cl_index_parent_and_child_docs",
+            search_type=SEARCH_TYPES.RECAP,
+            queue="celery",
+            pk_offset=0,
+            testing_mode=True,
+            sweep_index=True,
+        )
+
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+
+        # A new alert should be triggered containing only the new RD created.
+        self.assertEqual(
+            len(mail.outbox), 3, msg="Outgoing emails don't match."
+        )
+
+        recap_only_alert_2 = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert RECAP Only Docket Entry",
+            query=f"q=docket_entry_id:{de.pk}&type=r",
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+
+        # A new alert should be triggered containing two RDs.
+        self.assertEqual(
+            len(mail.outbox), 4, msg="Outgoing emails don't match."
+        )
diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py
index 408ae13da0..677898a688 100644
--- a/cl/alerts/utils.py
+++ b/cl/alerts/utils.py
@@ -5,6 +5,7 @@
 from django.http import QueryDict
 from elasticsearch_dsl import Q, Search
 from elasticsearch_dsl.response import Hit, Response
+from redis import Redis
 
 from cl.alerts.models import (
     SCHEDULED_ALERT_HIT_STATUS,
@@ -187,3 +188,44 @@ def query_includes_rd_field(query_params: CleanData) -> bool:
             return True
 
     return False
+
+
+def make_alert_set_key(alert_id: int, document_type: str) -> str:
+    """Generate a Redis key for storing alert hits.
+
+    :param alert_id: The ID of the alert.
+    :param document_type: The type of document associated with the alert.
+    :return: A Redis key string in the format "alert_hits:{alert_id}.{document_type}".
+    """
+    return f"alert_hits:{alert_id}.{document_type}"
+
+
+def add_document_hit_to_alert_set(
+    r: Redis, alert_id: int, document_type: str, document_id: int
+) -> None:
+    """Add a document ID to the Redis SET associated with an alert ID.
+
+    :param r: Redis client instance.
+    :param alert_id: The alert identifier.
+    :param document_type: The type of document associated with the alert.
+    :param document_id: The docket identifier to add.
+    :return: None
+    """
+    alert_key = make_alert_set_key(alert_id, document_type)
+    r.sadd(alert_key, document_id)
+
+
+def has_document_alert_hit_been_triggered(
+    r: Redis, alert_id: int, document_type: str, document_id: int
+) -> bool:
+    """Check if a document ID is a member of the Redis SET associated with an
+     alert ID.
+
+    :param r: Redis client instance.
+    :param alert_id: The alert identifier.
+    :param document_type: The type of document associated with the alert.
+    :param document_id: The docket identifier to check.
+    :return: True if the docket ID is a member of the set, False otherwise.
+    """
+    alert_key = make_alert_set_key(alert_id, document_type)
+    return r.sismember(alert_key, document_id)

From c1232ec8ae11f86a6c8209061d9fe80d5c9f62bb Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 27 Jun 2024 20:22:03 -0600
Subject: [PATCH 06/33] fix(alerts): Updated ES alert email templates to
 support RECAP Alerts.

- Added tests to assert nested child documents in case alerts.
---
 .../commands/cl_send_recap_alerts.py          |   8 +-
 cl/alerts/templates/alert_email_es.html       |  71 +++-
 cl/alerts/templates/alert_email_es.txt        |  13 +-
 cl/alerts/tests/tests_recap_alerts.py         | 305 +++++++++++++++++-
 cl/alerts/utils.py                            |   2 +-
 cl/custom_filters/templatetags/extras.py      |  20 +-
 6 files changed, 379 insertions(+), 40 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 537626bc17..fd8d3a13c9 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -124,7 +124,9 @@ def query_and_send_alerts(rate):
                 results_to_send = []
                 for hit in results:
                     if not includes_rd_fields:
-                        # Possible Docket-only query
+                        # Possible Docket-only alert
+                        # TODO important to keep the original ES child structure to preserve HLs.
+                        # Maybe we can merge HL after filtering them?
                         rds_to_send = filter_rd_alert_hits(
                             r, alert.pk, hit["child_docs"], check_rd_hl=True
                         )
@@ -135,6 +137,7 @@ def query_and_send_alerts(rate):
                         elif should_docket_hit_be_included(
                             r, alert.pk, hit.docket_id
                         ):
+                            # Docket-only alert
                             hit["child_docs"] = []
                             results_to_send.append(hit)
                             add_document_hit_to_alert_set(
@@ -146,7 +149,7 @@ def query_and_send_alerts(rate):
                             r, alert.pk, hit["child_docs"]
                         )
                         if rds_to_send:
-                            # Cross-object query
+                            # Cross-object alert
                             hit["child_docs"] = rds_to_send
                             results_to_send.append(hit)
 
@@ -162,7 +165,6 @@ def query_and_send_alerts(rate):
                     alert.query_run = search_params.urlencode()
                     alert.date_last_hit = now()
                     alert.save()
-
         if hits:
             send_search_alert_emails.delay([(user.pk, hits)])
             alerts_sent_count += 1
diff --git a/cl/alerts/templates/alert_email_es.html b/cl/alerts/templates/alert_email_es.html
index 36ccee5ccc..804f33a0bb 100644
--- a/cl/alerts/templates/alert_email_es.html
+++ b/cl/alerts/templates/alert_email_es.html
@@ -25,7 +25,7 @@ <h1 class="bottom"  style="font-size: 3em; font-weight: normal; line-height: 1;
             {% for result in results %}
                 {% if forloop.first %}
                     <h2 style="font-size: 2em; font-weight: normal; font-family: inherit; color: #111; border: 0; vertical-align: baseline; font-style: inherit; margin: 0; padding: 0;" class="alt bottom">
-                        Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% endif %} alert &mdash; {{alert.name}} &mdash; had {{num_results}}{% if num_results >= hits_limit  %}+{% endif %} hit{{results|pluralize}}:
+                        Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% elif type == 'r' %}RECAP{% endif %} alert &mdash; {{alert.name}} &mdash; had {{num_results}}{% if num_results >= hits_limit  %}+{% endif %} hit{{results|pluralize}}:
                     </h2>
                     <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0 0 1.5em; padding: 0;">
                       <a href="https://www.courtlistener.com/?{{ alert.query_run|safe }}&edit_alert={{ alert.pk }}">View Full Results / Edit this Alert</a><br>
@@ -34,28 +34,65 @@ <h2 style="font-size: 2em; font-weight: normal; font-family: inherit; color: #11
                 {% endif %}
 
                 <h3 class="alt bottom" style="font-size: 1.5em; font-weight: normal; line-height: 1; font-family: 'Warnock Pro', 'Goudy Old Style','Palatino','Book Antiqua', Georgia, serif; color: #666; border: 0; vertical-align: baseline; font-style: italic; margin: 0; padding: 0;">
-                    <a href="https://www.courtlistener.com{{result.absolute_url}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
+                    <a href="https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
                         {{ forloop.counter }}. {{ result|get_highlight:"caseName"|safe }}
-                        ({% if result.court_id != 'scotus' %}{{ result|get_highlight:"court_citation_string"|nbsp|safe }}&nbsp;{% endif %}{% if type == 'o' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
+                        ({% if result.court_id != 'scotus' %}{{ result|get_highlight:"court_citation_string"|nbsp|safe }}&nbsp;{% endif %}{% if type == 'o' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% elif type == 'r' %}{{ result.dateFiled|date:"Y" }}{% endif %})
                     </a>
                 </h3>
-                <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0; padding: 0;">
-                  <strong style="font-weight: bold;">
-                      View original:
-                  </strong>
-                  {% if result.download_url %}
-                    <a href="{{result.download_url}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
-                        From the court
+                {% if type == 'r' %}
+                    {% if result.docketNumber %}
+                      <strong style="font-weight: bold;">Docket Number: </strong>
+                      {{ result|get_highlight:"docketNumber"|safe }}
+                    {% endif %}
+                    <br>
+                    <strong style="font-weight: bold;">Date Filed: </strong>
+                    {% if result.dateFiled %}
+                      {{ result.dateFiled|date:"F jS, Y" }}
+                    {% else %}
+                      Unknown Date
+                    {% endif %}
+                    <ul>
+                      {% for doc in result.child_docs %}
+                        {% with doc=doc|get_attrdict:"_source" %}
+                          <li>
+                            <a href="https://www.courtlistener.com{% if doc.absolute_url %}{{ doc.absolute_url }}{% else %}{{ result.docket_absolute_url }}#minute-entry-{{ doc.docket_entry_id }}{% endif %}" class="visitable">{% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe }}<span class="gray">&nbsp;&mdash;&nbsp;</span>{% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
+                            </a>
+                            {% if doc.description %}
+                                <span style="display: block; margin-top: 5px;">Description: {{ doc.description|render_string_or_list|safe }}</span>
+                            {% endif %}
+                            {% if doc.plain_text %}
+                              {% contains_highlights doc.plain_text.0 True as highlighted %}
+                              <span style="display: block; margin-top: 5px;">{% if highlighted %}&hellip; {% endif %}{{ doc.plain_text|render_string_or_list|safe|underscore_to_space }} &hellip;</span>
+                            {% endif %}
+                          </li>
+                        {% endwith %}
+                      {% endfor %}
+                    </ul>
+                    {% if result.child_remaining %}
+                      {% extract_q_value alert.query_run as q_value %}
+                      <a href="https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}">
+                        <strong>View Additional Results for this Case</strong>
+                      </a>
+                    {% endif %}
+                {% else %}
+                   <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0; padding: 0;">
+                    <strong style="font-weight: bold;">
+                        View original:
+                    </strong>
+                    {% if result.download_url %}
+                      <a href="{{result.download_url}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
+                          From the court
+                      </a>
+                      &nbsp;&nbsp;|&nbsp;&nbsp;
+                    {% endif %}
+                  {% if result.local_path %}
+                    {# Provide link to S3. #}
+                    <a href="https://storage.courtlistener.com/{{result.local_path}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
+                        Our backup
                     </a>
-                    &nbsp;&nbsp;|&nbsp;&nbsp;
                   {% endif %}
-                {% if result.local_path %}
-                  {# Provide link to S3. #}
-                  <a href="https://storage.courtlistener.com/{{result.local_path}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
-                      Our backup
-                  </a>
+                  </p>
                 {% endif %}
-                </p>
                 {% if type == 'oa' %}
                   <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0 0 0; padding: 0;">
                     <strong style="font-weight: bold;">Date Argued: </strong>
diff --git a/cl/alerts/templates/alert_email_es.txt b/cl/alerts/templates/alert_email_es.txt
index db8363e713..f4aa763cec 100644
--- a/cl/alerts/templates/alert_email_es.txt
+++ b/cl/alerts/templates/alert_email_es.txt
@@ -10,16 +10,23 @@ CourtListener.com
 We have news regarding your alerts at CourtListener.com
 -------------------------------------------------------
 
-{% for alert, type, results, num_results in hits %}{% for result in results %}{% if forloop.first %}Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% endif %} alert -- {{alert.name}} -- had {{num_results}}{% if num_results >= hits_limit  %}+{% endif %} hit{{results|pluralize}}:
+{% for alert, type, results, num_results in hits %}{% for result in results %}{% if forloop.first %}Your {{alert.get_rate_display|lower}} {% if type == 'o' %}opinion{% elif type == 'oa' %}oral argument{% elif type == 'r' %}RECAP{% endif %} alert -- {{alert.name}} -- had {{num_results}}{% if num_results >= hits_limit  %}+{% endif %} hit{{results|pluralize}}:
 -------------------------------------------------------
 View Full Results / Edit this Alert: https://www.courtlistener.com/?{{ alert.query_run|safe }}&edit_alert={{ alert.pk }}
 Disable this Alert (one click): https://www.courtlistener.com{% url "disable_alert" alert.secret_key %}{% endif %}
 
 {{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
 {% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}{% endif %}
-{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}
+{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}{% endif %}
+{% if type == 'r' %}{% if result.dateFiled %}Date Filed: {{ result.dateFiled|date:"F jS, Y" }}{% else %}Date Filed: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %}
+{% for doc in result.child_docs %}{% with doc=doc|get_attrdict:"_source" %} - {% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe|striptags }} - {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
+   {% if doc.description %}Description: {{ doc.description|render_string_or_list|safe|striptags }}{% endif %}
+   {% if doc.plain_text %}{% contains_highlights doc.plain_text.0 True as highlighted %}{% if highlighted %}...{% endif %}{{ doc.plain_text|render_string_or_list|safe|striptags|underscore_to_space }}...{% endif %}
+   View this document on our site: https://www.courtlistener.com{% if doc.absolute_url %}{{ doc.absolute_url }}{% else %}{{ result.docket_absolute_url }}#minute-entry-{{ doc.docket_entry_id }}{% endif %}
+{% endwith %}{% endfor %}
+{% if result.child_remaining %}{% extract_q_value alert.query_run as q_value %}View Additional Results for this Case: https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}{% endif %}
 {% endif %}~~~~~
- - View this item on our site: https://www.courtlistener.com{{result.absolute_url}}
+ - View this item on our site: https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %}
 {% if result.download_url %} - Download original from the court: {{result.download_url}}
 {% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endfor %}
 
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index d3b345f36e..8927be1f12 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -6,6 +6,7 @@
 from django.core import mail
 from django.core.management import call_command
 from django.utils.timezone import now
+from lxml import html
 
 from cl.alerts.factories import AlertFactory
 from cl.alerts.models import SEARCH_TYPES, Alert
@@ -75,6 +76,108 @@ def get_html_content_from_email(email_content):
                 break
         return html_content
 
+    def _confirm_number_of_alerts(self, html_content, expected_count):
+        """Test the number of alerts included in the email alert."""
+        tree = html.fromstring(html_content)
+        got = len(tree.xpath("//h2"))
+
+        self.assertEqual(
+            got,
+            expected_count,
+            msg="Did not get the right number of alerts in the email. "
+            "Expected: %s - Got: %s\n\n" % (expected_count, got),
+        )
+
+    def _count_alert_hits_and_child_hits(
+        self,
+        html_content,
+        alert_title,
+        expected_hits,
+        case_title,
+        expected_child_hits,
+    ):
+        """Confirm the following assertions for the email alert:
+        - An specific alert is included in the email alert.
+        - The specified alert contains the expected number of hits.
+        - The specified case contains the expected number of child hits.
+        """
+        tree = html.fromstring(html_content)
+        alert_element = tree.xpath(f"//h2[contains(text(), '{alert_title}')]")
+        self.assertTrue(
+            alert_element, msg=f"Not alert with title {alert_title} found."
+        )
+
+        # Find the corresponding case_title under the alert_element
+        alert_index = tree.xpath("//h2").index(alert_element[0])
+        alert_cases = tree.xpath(
+            f"//h2[{alert_index + 1}]/following-sibling::h3"
+        )
+        self.assertEqual(
+            len(alert_cases),
+            expected_hits,
+            msg="Did not get the right number of hits for the alert %s. "
+            "Expected: %s - Got: %s\n\n"
+            % (alert_title, expected_hits, len(alert_cases)),
+        )
+        if case_title:
+            child_hit_count = 0
+            for case in alert_cases:
+                case_text = " ".join(case.xpath(".//text()")).strip()
+                if case_title in case_text:
+                    child_hit_count = len(
+                        case.xpath("following-sibling::ul[1]/li/a")
+                    )
+
+            self.assertEqual(
+                child_hit_count,
+                expected_child_hits,
+                msg="Did not get the right number of child hits for the case %s. "
+                "Expected: %s - Got: %s\n\n"
+                % (case_title, expected_child_hits, child_hit_count),
+            )
+
+    def _assert_child_hits_content(
+        self,
+        html_content,
+        alert_title,
+        case_title,
+        expected_child_descriptions,
+    ):
+        """Confirm the child hits in a case are the expected ones, comparing
+        their descriptions.
+        """
+        tree = html.fromstring(html_content)
+        alert_element = tree.xpath(f"//h2[contains(text(), '{alert_title}')]")
+        # Find the corresponding case_title under the alert_element
+        alert_index = tree.xpath("//h2").index(alert_element[0])
+        alert_cases = tree.xpath(
+            f"//h2[{alert_index + 1}]/following-sibling::h3"
+        )
+
+        def extract_child_descriptions(case_item):
+            child_documents = case_item.xpath("//ul/li")
+            results = []
+            for li in child_documents:
+                a_tag = li.xpath(".//a")[0]
+                full_text = a_tag.text_content()
+                first_part = full_text.split("\u2014")[0].strip()
+                results.append(first_part)
+
+            return results
+
+        child_descriptions = set()
+        for case in alert_cases:
+            case_text = "".join(case.xpath(".//text()")).strip()
+            if case_title in case_text:
+                child_descriptions = set(extract_child_descriptions(case))
+                break
+
+        self.assertEqual(
+            child_descriptions,
+            set(expected_child_descriptions),
+            msg=f"Child hits didn't match for case {case_title}",
+        )
+
     async def test_recap_document_hl_matched(self) -> None:
         """Test recap_document_hl_matched method that determines weather a hit
         contains RECAPDocument HL fields."""
@@ -244,8 +347,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         Assert the content structure accordingly.
         """
 
-        # This docket-only alert, matches a Docket added today.
-
+        # This docket-only alert matches a Docket ingested today.
         docket_only_alert = AlertFactory(
             user=self.user_profile.user,
             rate=Alert.REAL_TIME,
@@ -265,15 +367,29 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         )
         html_content = self.get_html_content_from_email(mail.outbox[0])
         self.assertIn(docket_only_alert.name, html_content)
+        self._confirm_number_of_alerts(html_content, 1)
+        # The docket-only alert doesn't contain any nested child hits.
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            docket_only_alert.name,
+            1,
+            self.de.docket.case_name,
+            0,
+        )
+
+        # Assert email text version:
+        txt_email = mail.outbox[0].body
+        self.assertIn(docket_only_alert.name, txt_email)
 
-        # This test shouldn't match the Docket-only alert when the RD is added
-        # today since its parent Docket was not modified today.
+        # The following test shouldn't match the Docket-only alert when the RD
+        # is added today since its parent Docket was not modified today.
         AlertFactory(
             user=self.user_profile_2.user,
             rate=Alert.REAL_TIME,
             name="Test Alert Docket Only Not Triggered",
             query='q="405 Civil"&type=r',
         )
+        # Simulate docket is ingested a day before.
         one_day_before = now() - datetime.timedelta(days=1)
         mock_date = one_day_before.replace(hour=5)
         with time_machine.travel(mock_date, tick=False):
@@ -289,16 +405,17 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 jury_demand="1,000,000",
             )
 
+        # Its related RD is ingested today.
         mock_date = now().replace(hour=5)
         with time_machine.travel(mock_date, tick=False):
-            de = DocketEntryWithParentsFactory(
+            alert_de = DocketEntryWithParentsFactory(
                 docket=docket,
                 entry_number=1,
                 date_filed=datetime.date(2024, 8, 19),
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
             rd = RECAPDocumentFactory(
-                docket_entry=de,
+                docket_entry=alert_de,
                 description="Motion to File",
                 document_number="1",
                 is_available=True,
@@ -306,7 +423,6 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 pacer_doc_id="018036652436",
                 plain_text="plain text for 018036652436",
             )
-
         call_command(
             "cl_index_parent_and_child_docs",
             search_type=SEARCH_TYPES.RECAP,
@@ -323,11 +439,12 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             ),
         ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts")
-        # No new alert should be triggered.
+        # The RD ingestion's shouldn't match the docket-only alert.
         self.assertEqual(
             len(mail.outbox), 1, msg="Outgoing emails don't match."
         )
 
+        # Test a RECAP-only alert query.
         recap_only_alert = AlertFactory(
             user=self.user_profile.user,
             rate=Alert.REAL_TIME,
@@ -345,8 +462,29 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
         )
+        html_content = self.get_html_content_from_email(mail.outbox[1])
+        self._confirm_number_of_alerts(html_content, 1)
+        # Only one child hit should be included in the case within the alert.
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            recap_only_alert.name,
+            1,
+            alert_de.docket.case_name,
+            1,
+        )
+        self._assert_child_hits_content(
+            html_content,
+            recap_only_alert.name,
+            alert_de.docket.case_name,
+            [rd.description],
+        )
+        # Assert email text version:
+        txt_email = mail.outbox[1].body
+        self.assertIn(recap_only_alert.name, txt_email)
+        self.assertIn(rd.description, txt_email)
 
-        # Trigger the alert again.
+        # Trigger the same alert again to confirm that no new alert is
+        # triggered because previous hits have already triggered the same alert
         with mock.patch(
             "cl.api.webhooks.requests.post",
             side_effect=lambda *args, **kwargs: MockResponse(
@@ -359,9 +497,10 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             len(mail.outbox), 2, msg="Outgoing emails don't match."
         )
 
-        # Create a new RD for the same DocketEntry.
-        rd = RECAPDocumentFactory(
-            docket_entry=de,
+        # Create a new RD for the same DocketEntry to confirm this new RD is
+        # properly included in the alert email.
+        rd_2 = RECAPDocumentFactory(
+            docket_entry=alert_de,
             description="Motion to File 2",
             document_number="2",
             is_available=True,
@@ -390,12 +529,22 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         self.assertEqual(
             len(mail.outbox), 3, msg="Outgoing emails don't match."
         )
+        html_content = self.get_html_content_from_email(mail.outbox[2])
+        self._confirm_number_of_alerts(html_content, 1)
+        self._assert_child_hits_content(
+            html_content,
+            recap_only_alert.name,
+            alert_de.docket.case_name,
+            [rd_2.description],
+        )
 
+        # The following test confirms that hits previously matched with other
+        # alerts can match a different alert.
         recap_only_alert_2 = AlertFactory(
             user=self.user_profile.user,
             rate=Alert.REAL_TIME,
             name="Test Alert RECAP Only Docket Entry",
-            query=f"q=docket_entry_id:{de.pk}&type=r",
+            query=f"q=docket_entry_id:{alert_de.pk}&type=r",
         )
         with mock.patch(
             "cl.api.webhooks.requests.post",
@@ -405,7 +554,135 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts")
 
-        # A new alert should be triggered containing two RDs.
+        # A new alert should be triggered containing two RDs (rd and rd_2)
         self.assertEqual(
             len(mail.outbox), 4, msg="Outgoing emails don't match."
         )
+        html_content = self.get_html_content_from_email(mail.outbox[3])
+        self._confirm_number_of_alerts(html_content, 1)
+        self._assert_child_hits_content(
+            html_content,
+            recap_only_alert_2.name,
+            alert_de.docket.case_name,
+            [rd.description, rd_2.description],
+        )
+        # Assert email text version:
+        txt_email = mail.outbox[3].body
+        self.assertIn(recap_only_alert.name, txt_email)
+        self.assertIn(rd.description, txt_email)
+        self.assertIn(rd_2.description, txt_email)
+
+        # The following test confirms that a cross-object alert is properly
+        # matched and triggered
+        recap_only_alert_3 = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object query",
+            query=f'q="Motion to File 2"&docket_number={docket.docket_number}&type=r',
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+
+        # A new alert should be triggered containing one RD (rd_2)
+        self.assertEqual(
+            len(mail.outbox), 5, msg="Outgoing emails don't match."
+        )
+        html_content = self.get_html_content_from_email(mail.outbox[4])
+        self._confirm_number_of_alerts(html_content, 1)
+        self._assert_child_hits_content(
+            html_content,
+            recap_only_alert_3.name,
+            alert_de.docket.case_name,
+            [rd_2.description],
+        )
+        # Assert email text version:
+        txt_email = mail.outbox[4].body
+        self.assertIn(recap_only_alert_3.name, txt_email)
+        self.assertIn(rd_2.description, txt_email)
+
+    def test_limit_alert_case_child_hits(self) -> None:
+        """Test limit case child hits up to 5 and display the "View additional
+        results for this Case" button.
+        """
+
+        mock_date = now().replace(hour=5)
+        with time_machine.travel(mock_date, tick=False):
+            alert_de = DocketEntryWithParentsFactory(
+                docket=self.de.docket,
+                entry_number=1,
+                date_filed=datetime.date(2024, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            rd_descriptions = []
+            for i in range(6):
+                rd = RECAPDocumentFactory(
+                    docket_entry=alert_de,
+                    description=f"Motion to File {i+1}",
+                    document_number=f"{i+1}",
+                    pacer_doc_id=f"018036652436{i+1}",
+                )
+                if i < 5:
+                    # Omit the last alert to compare. Only up to 5 should be
+                    # included in the case.
+                    rd_descriptions.append(rd.description)
+
+        call_command(
+            "cl_index_parent_and_child_docs",
+            search_type=SEARCH_TYPES.RECAP,
+            queue="celery",
+            pk_offset=0,
+            testing_mode=True,
+            sweep_index=True,
+        )
+        recap_only_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert RECAP Only Docket Entry",
+            query=f"q=docket_entry_id:{alert_de.pk}&type=r",
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+
+        self.assertEqual(
+            len(mail.outbox), 1, msg="Outgoing emails don't match."
+        )
+        html_content = self.get_html_content_from_email(mail.outbox[0])
+        self.assertIn(recap_only_alert.name, html_content)
+        self._confirm_number_of_alerts(html_content, 1)
+        # The docket-only alert doesn't contain any nested child hits.
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            recap_only_alert.name,
+            1,
+            self.de.docket.case_name,
+            5,
+        )
+        self._assert_child_hits_content(
+            html_content,
+            recap_only_alert.name,
+            alert_de.docket.case_name,
+            rd_descriptions,
+        )
+        # Assert the View more results button is present in the alert.
+        self.assertIn("View Additional Results for this Case", html_content)
+
+        # Assert email text version:
+        txt_email = mail.outbox[0].body
+        self.assertIn(recap_only_alert.name, txt_email)
+        for description in rd_descriptions:
+            with self.subTest(
+                description=description, msg="Plain text descriptions"
+            ):
+                self.assertIn(description, txt_email)
+
+        self.assertIn("View Additional Results for this Case", txt_email)
diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py
index 677898a688..44277a04a2 100644
--- a/cl/alerts/utils.py
+++ b/cl/alerts/utils.py
@@ -154,7 +154,7 @@ def recap_document_hl_matched(rd_hit: Hit) -> bool:
     :return: True if the hit matched a RECAPDocument field. Otherwise, False.
     """
 
-    matched_rd_hl = set()
+    matched_rd_hl: set[str] = set()
     rd_hl_fields = set(SEARCH_RECAP_CHILD_HL_FIELDS.keys())
     if hasattr(rd_hit, "highlight"):
         highlights = rd_hit.highlight.to_dict()
diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 40d2813cda..53cc1a2b71 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -1,5 +1,6 @@
 import random
 import re
+import urllib.parse
 
 from django import template
 from django.core.exceptions import ValidationError
@@ -10,6 +11,7 @@
 from django.utils.safestring import SafeString, mark_safe
 from elasticsearch_dsl import AttrDict, AttrList
 
+from cl.search.constants import ALERTS_HL_TAG, SEARCH_HL_TAG
 from cl.search.models import Docket, DocketEntry
 
 register = template.Library()
@@ -198,13 +200,15 @@ def citation(obj) -> SafeString:
 
 
 @register.simple_tag
-def contains_highlights(content: str) -> bool:
+def contains_highlights(content: str, alert: bool = False) -> bool:
     """Check if a given string contains the mark tag used in highlights.
 
     :param content: The input string to check.
+    :param alert: Whether this tag is being used in the alert template.
     :return: True if the mark highlight tag is found, otherwise False.
     """
-    pattern = r"<mark>.*?</mark>"
+    hl_tag = ALERTS_HL_TAG if alert else SEARCH_HL_TAG
+    pattern = rf"<{hl_tag}>.*?</{hl_tag}>"
     matches = re.findall(pattern, content)
     return bool(matches)
 
@@ -243,3 +247,15 @@ def get_highlight(result: AttrDict | dict[str, any], field: str) -> any:
         original_value = result.get(field, "")
 
     return render_string_or_list(hl_value) if hl_value else original_value
+
+
+@register.simple_tag
+def extract_q_value(query: str) -> str:
+    """Extract the value of the "q" parameter from a URL-encoded query string.
+
+    :param query: The URL-encoded query string.
+    :return: The value of the "q" parameter or an empty string if "q" is not found.
+    """
+
+    parsed_query = urllib.parse.parse_qs(query)
+    return parsed_query.get("q", [""])[0]

From 51c7bb65f1f962e315c337825831ec2e405b3f0b Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 28 Jun 2024 15:43:55 -0600
Subject: [PATCH 07/33] fix(alerts): Group alerts and case hits limit

---
 .../commands/cl_send_recap_alerts.py          |   2 -
 cl/alerts/tests/tests_recap_alerts.py         | 214 ++++++++++++++++--
 cl/lib/elasticsearch_utils.py                 |   4 +-
 3 files changed, 201 insertions(+), 19 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index fd8d3a13c9..bdaae5bfb1 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -125,8 +125,6 @@ def query_and_send_alerts(rate):
                 for hit in results:
                     if not includes_rd_fields:
                         # Possible Docket-only alert
-                        # TODO important to keep the original ES child structure to preserve HLs.
-                        # Maybe we can merge HL after filtering them?
                         rds_to_send = filter_rd_alert_hits(
                             r, alert.pk, hit["child_docs"], check_rd_hl=True
                         )
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 8927be1f12..32eac189ec 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -5,6 +5,7 @@
 from asgiref.sync import sync_to_async
 from django.core import mail
 from django.core.management import call_command
+from django.test.utils import override_settings
 from django.utils.timezone import now
 from lxml import html
 
@@ -88,6 +89,28 @@ def _confirm_number_of_alerts(self, html_content, expected_count):
             "Expected: %s - Got: %s\n\n" % (expected_count, got),
         )
 
+    @staticmethod
+    def _extract_cases_from_alert(html_tree, alert_title):
+        """Extract the case elements (h3) under a specific alert (h2) from the
+        HTML tree.
+        """
+        alert_element = html_tree.xpath(
+            f"//h2[contains(text(), '{alert_title}')]"
+        )
+        h2_elements = html_tree.xpath("//h2")
+        alert_index = h2_elements.index(alert_element[0])
+        # Find the <h3> elements between this <h2> and the next <h2>
+        if alert_index + 1 < len(h2_elements):
+            next_alert_element = h2_elements[alert_index + 1]
+            alert_cases = html_tree.xpath(
+                f"//h2[contains(text(), '{alert_title}')]/following-sibling::*[following-sibling::h2[1] = '{next_alert_element.text}'][self::h3]"
+            )
+        else:
+            alert_cases = html_tree.xpath(
+                f"//h2[contains(text(), '{alert_title}')]/following-sibling::h3"
+            )
+        return alert_cases
+
     def _count_alert_hits_and_child_hits(
         self,
         html_content,
@@ -107,11 +130,8 @@ def _count_alert_hits_and_child_hits(
             alert_element, msg=f"Not alert with title {alert_title} found."
         )
 
-        # Find the corresponding case_title under the alert_element
-        alert_index = tree.xpath("//h2").index(alert_element[0])
-        alert_cases = tree.xpath(
-            f"//h2[{alert_index + 1}]/following-sibling::h3"
-        )
+        alert_cases = self._extract_cases_from_alert(tree, alert_title)
+
         self.assertEqual(
             len(alert_cases),
             expected_hits,
@@ -149,13 +169,10 @@ def _assert_child_hits_content(
         tree = html.fromstring(html_content)
         alert_element = tree.xpath(f"//h2[contains(text(), '{alert_title}')]")
         # Find the corresponding case_title under the alert_element
-        alert_index = tree.xpath("//h2").index(alert_element[0])
-        alert_cases = tree.xpath(
-            f"//h2[{alert_index + 1}]/following-sibling::h3"
-        )
+        alert_cases = self._extract_cases_from_alert(tree, alert_title)
 
         def extract_child_descriptions(case_item):
-            child_documents = case_item.xpath("//ul/li")
+            child_documents = case_item.xpath("./following-sibling::ul[1]/li")
             results = []
             for li in child_documents:
                 a_tag = li.xpath(".//a")[0]
@@ -175,7 +192,7 @@ def extract_child_descriptions(case_item):
         self.assertEqual(
             child_descriptions,
             set(expected_child_descriptions),
-            msg=f"Child hits didn't match for case {case_title}",
+            msg=f"Child hits didn't match for case {case_title}, Got {child_descriptions}, Expected: {expected_child_descriptions} ",
         )
 
     async def test_recap_document_hl_matched(self) -> None:
@@ -574,7 +591,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
 
         # The following test confirms that a cross-object alert is properly
         # matched and triggered
-        recap_only_alert_3 = AlertFactory(
+        cross_object_alert = AlertFactory(
             user=self.user_profile.user,
             rate=Alert.REAL_TIME,
             name="Test Alert Cross-object query",
@@ -596,13 +613,13 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         self._confirm_number_of_alerts(html_content, 1)
         self._assert_child_hits_content(
             html_content,
-            recap_only_alert_3.name,
+            cross_object_alert.name,
             alert_de.docket.case_name,
             [rd_2.description],
         )
         # Assert email text version:
         txt_email = mail.outbox[4].body
-        self.assertIn(recap_only_alert_3.name, txt_email)
+        self.assertIn(cross_object_alert.name, txt_email)
         self.assertIn(rd_2.description, txt_email)
 
     def test_limit_alert_case_child_hits(self) -> None:
@@ -659,7 +676,7 @@ def test_limit_alert_case_child_hits(self) -> None:
         html_content = self.get_html_content_from_email(mail.outbox[0])
         self.assertIn(recap_only_alert.name, html_content)
         self._confirm_number_of_alerts(html_content, 1)
-        # The docket-only alert doesn't contain any nested child hits.
+        # The case alert should contain up to 5 child hits.
         self._count_alert_hits_and_child_hits(
             html_content,
             recap_only_alert.name,
@@ -683,6 +700,171 @@ def test_limit_alert_case_child_hits(self) -> None:
             with self.subTest(
                 description=description, msg="Plain text descriptions"
             ):
-                self.assertIn(description, txt_email)
+                self.assertIn(
+                    description,
+                    txt_email,
+                    msg="RECAPDocument wasn't found in the email content.",
+                )
 
         self.assertIn("View Additional Results for this Case", txt_email)
+
+    @override_settings(SCHEDULED_ALERT_HITS_LIMIT=3)
+    def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
+        """Test multiple alerts can be grouped in an email and hits within an
+        alert are limited to SCHEDULED_ALERT_HITS_LIMIT (3) hits.
+        """
+
+        docket = DocketFactory(
+            court=self.court,
+            case_name=f"SUBPOENAS SERVED CASE",
+            docket_number=f"1:21-bk-123",
+            source=Docket.RECAP,
+            cause="410 Civil",
+        )
+        for i in range(3):
+            DocketFactory(
+                court=self.court,
+                case_name=f"SUBPOENAS SERVED CASE {i}",
+                docket_number=f"1:21-bk-123{i}",
+                source=Docket.RECAP,
+                cause="410 Civil",
+            )
+
+        alert_de = DocketEntryWithParentsFactory(
+            docket=docket,
+            entry_number=1,
+            date_filed=datetime.date(2024, 8, 19),
+            description="MOTION for Leave to File Amicus Curiae Lorem Served",
+        )
+        rd = RECAPDocumentFactory(
+            docket_entry=alert_de,
+            description="Motion to File",
+            document_number="1",
+            pacer_doc_id="018036652439",
+        )
+        rd_2 = RECAPDocumentFactory(
+            docket_entry=alert_de,
+            description="Motion to File 2",
+            document_number="2",
+            pacer_doc_id="018036652440",
+            plain_text= "plain text lorem"
+        )
+
+        docket_only_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Docket Only",
+            query='q="410 Civil"&type=r',
+        )
+        recap_only_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert RECAP Only Docket Entry",
+            query=f"q=docket_entry_id:{alert_de.pk}&type=r",
+        )
+        cross_object_alert_with_hl = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object",
+            query=f'q="File Amicus Curiae" AND "Motion to File 2" AND '
+                  f'"plain text lorem" AND "410 Civil" AND '
+                  f'id:{rd_2.pk}&docket_number={docket.docket_number}'
+                  f'&case_name="{docket.case_name}"&type=r',
+        )
+        call_command(
+            "cl_index_parent_and_child_docs",
+            search_type=SEARCH_TYPES.RECAP,
+            queue="celery",
+            pk_offset=0,
+            testing_mode=True,
+            sweep_index=True,
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+
+        self.assertEqual(
+            len(mail.outbox), 1, msg="Outgoing emails don't match."
+        )
+
+        # Assert docket-only alert.
+        html_content = self.get_html_content_from_email(mail.outbox[0])
+        self.assertIn(docket_only_alert.name, html_content)
+        self._confirm_number_of_alerts(html_content, 3)
+        # The docket-only alert doesn't contain any nested child hits.
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            docket_only_alert.name,
+            3,
+            self.de.docket.case_name,
+            0,
+        )
+
+        # Assert RECAP-only alert.
+        self.assertIn(recap_only_alert.name, html_content)
+        # The recap-only alert contain 2 child hits.
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            recap_only_alert.name,
+            1,
+            alert_de.docket.case_name,
+            2,
+        )
+        self._assert_child_hits_content(
+            html_content,
+            recap_only_alert.name,
+            alert_de.docket.case_name,
+            [rd.description, rd_2.description],
+        )
+
+        # Assert Cross-object alert.
+        self.assertIn(recap_only_alert.name, html_content)
+        # The cross-object alert only contain 1 child hit.
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            cross_object_alert_with_hl.name,
+            1,
+            alert_de.docket.case_name,
+            1,
+        )
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_with_hl.name,
+            alert_de.docket.case_name,
+            [rd_2.description],
+        )
+
+        # Assert HL in the cross_object_alert_with_hl
+        self.assertIn(f"<strong>{docket.case_name}</strong>", html_content)
+        self.assertEqual(html_content.count(f"<strong>{docket.case_name}</strong>"), 1)
+        self.assertIn(f"<strong>{docket.docket_number}</strong>", html_content)
+        self.assertEqual(
+            html_content.count(f"<strong>{docket.docket_number}</strong>"), 1)
+        self.assertIn(f"<strong>{rd_2.plain_text}</strong>", html_content)
+        self.assertEqual(
+            html_content.count(f"<strong>{rd_2.plain_text}</strong>"), 1)
+        self.assertIn(f"<strong>{rd_2.description}</strong>", html_content)
+        self.assertEqual(
+            html_content.count(f"<strong>{rd_2.description}</strong>"), 1)
+        self.assertIn("<strong>File Amicus Curiae</strong>", html_content)
+        self.assertEqual(
+            html_content.count("<strong>File Amicus Curiae</strong>"), 1)
+
+        # Assert email text version:
+        txt_email = mail.outbox[0].body
+        self.assertIn(recap_only_alert.name, txt_email)
+        self.assertIn(docket_only_alert.name, txt_email)
+        self.assertIn(cross_object_alert_with_hl.name, txt_email)
+        for description in [rd.description, rd_2.description]:
+            with self.subTest(
+                description=description, msg="Plain text descriptions"
+            ):
+                self.assertIn(
+                    description,
+                    txt_email,
+                    msg="RECAPDocument wasn't found in the email content.",
+                )
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index b4eab7c64b..9f6447c0e1 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3053,7 +3053,9 @@ def do_es_sweep_alert_query(
 
     main_query = add_es_highlighting(s, cd, alerts=True)
     main_query = main_query.sort(build_sort_results(cd))
-    main_query = main_query.extra(from_=0, size=30)
+    main_query = main_query.extra(
+        from_=0, size=settings.SCHEDULED_ALERT_HITS_LIMIT
+    )
     results = main_query.execute()
     if results:
         total_hits = results.hits.total.value

From 7fc3298e7f3d290ac2aefd87f299df082fc3235e Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 28 Jun 2024 19:21:45 -0600
Subject: [PATCH 08/33] fix(alerts): Trigger RECAP search alerts webhooks

---
 .../commands/cl_send_recap_alerts.py          |  13 +++
 cl/alerts/tasks.py                            |   2 +-
 cl/alerts/tests/tests_recap_alerts.py         | 100 +++++++++++++++---
 cl/api/tasks.py                               |  47 ++++++--
 cl/lib/elasticsearch_utils.py                 |   1 -
 5 files changed, 139 insertions(+), 24 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index bdaae5bfb1..d4a533c69d 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -16,6 +16,8 @@
     query_includes_rd_field,
     recap_document_hl_matched,
 )
+from cl.api.models import WebhookEventType
+from cl.api.tasks import send_es_search_alert_webhook
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.lib.redis_utils import get_redis_interface
@@ -163,6 +165,17 @@ def query_and_send_alerts(rate):
                     alert.query_run = search_params.urlencode()
                     alert.date_last_hit = now()
                     alert.save()
+
+                    # Send webhook event if the user has a SEARCH_ALERT
+                    # endpoint enabled.
+                    user_webhooks = user.webhooks.filter(
+                        event_type=WebhookEventType.SEARCH_ALERT, enabled=True
+                    )
+                    for user_webhook in user_webhooks:
+                        send_es_search_alert_webhook.delay(
+                            results_to_send, user_webhook.pk, alert.pk
+                        )
+
         if hits:
             send_search_alert_emails.delay([(user.pk, hits)])
             alerts_sent_count += 1
diff --git a/cl/alerts/tasks.py b/cl/alerts/tasks.py
index da43d8155d..037fe22b4c 100644
--- a/cl/alerts/tasks.py
+++ b/cl/alerts/tasks.py
@@ -461,7 +461,7 @@ def send_webhook_alert_hits(
             send_es_search_alert_webhook.delay(
                 documents,
                 user_webhook.pk,
-                alert,
+                alert.pk,
             )
 
 
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 32eac189ec..d910c67272 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -6,6 +6,7 @@
 from django.core import mail
 from django.core.management import call_command
 from django.test.utils import override_settings
+from django.utils.html import strip_tags
 from django.utils.timezone import now
 from lxml import html
 
@@ -13,7 +14,7 @@
 from cl.alerts.models import SEARCH_TYPES, Alert
 from cl.alerts.utils import query_includes_rd_field, recap_document_hl_matched
 from cl.api.factories import WebhookFactory
-from cl.api.models import WebhookEventType
+from cl.api.models import WebhookEvent, WebhookEventType
 from cl.donate.models import NeonMembership
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.lib.test_helpers import RECAPSearchTestCase
@@ -195,6 +196,38 @@ def extract_child_descriptions(case_item):
             msg=f"Child hits didn't match for case {case_title}, Got {child_descriptions}, Expected: {expected_child_descriptions} ",
         )
 
+    def _count_webhook_hits_and_child_hits(
+        self,
+        webhooks,
+        alert_title,
+        expected_hits,
+        case_title,
+        expected_child_hits,
+    ):
+        """Confirm the following assertions for the search alert webhook:
+        - An specific alert webhook was triggered.
+        - The specified alert contains the expected number of hits.
+        - The specified case contains the expected number of child hits.
+        """
+
+        for webhook in webhooks:
+            if webhook["payload"]["alert"]["name"] == alert_title:
+                webhook_cases = webhook["payload"]["results"]
+                self.assertEqual(
+                    len(webhook_cases),
+                    expected_hits,
+                    msg=f"Did not get the right number of hits for the alert %s. "
+                    % alert_title,
+                )
+                for case in webhook["payload"]["results"]:
+                    if case_title == strip_tags(case["caseName"]):
+                        self.assertEqual(
+                            len(case["recap_documents"]),
+                            expected_child_hits,
+                            msg=f"Did not get the right number of child documents for the case %s. "
+                            % case_title,
+                        )
+
     async def test_recap_document_hl_matched(self) -> None:
         """Test recap_document_hl_matched method that determines weather a hit
         contains RECAPDocument HL fields."""
@@ -747,7 +780,7 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             description="Motion to File 2",
             document_number="2",
             pacer_doc_id="018036652440",
-            plain_text= "plain text lorem"
+            plain_text="plain text lorem",
         )
 
         docket_only_alert = AlertFactory(
@@ -767,10 +800,20 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             rate=Alert.REAL_TIME,
             name="Test Alert Cross-object",
             query=f'q="File Amicus Curiae" AND "Motion to File 2" AND '
-                  f'"plain text lorem" AND "410 Civil" AND '
-                  f'id:{rd_2.pk}&docket_number={docket.docket_number}'
-                  f'&case_name="{docket.case_name}"&type=r',
+            f'"plain text lorem" AND "410 Civil" AND '
+            f"id:{rd_2.pk}&docket_number={docket.docket_number}"
+            f'&case_name="{docket.case_name}"&type=r',
         )
+        AlertFactory(
+            user=self.user_profile_2.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object",
+            query=f'q="File Amicus Curiae" AND "Motion to File 2" AND '
+            f'"plain text lorem" AND "410 Civil" AND '
+            f"id:{rd_2.pk}&docket_number={docket.docket_number}"
+            f'&case_name="{docket.case_name}"&type=r',
+        )
+
         call_command(
             "cl_index_parent_and_child_docs",
             search_type=SEARCH_TYPES.RECAP,
@@ -788,8 +831,14 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             call_command("cl_send_recap_alerts")
 
         self.assertEqual(
-            len(mail.outbox), 1, msg="Outgoing emails don't match."
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
+        )
+
+        # Assert webhooks.
+        webhook_events = WebhookEvent.objects.all().values_list(
+            "content", flat=True
         )
+        self.assertEqual(len(webhook_events), 3)
 
         # Assert docket-only alert.
         html_content = self.get_html_content_from_email(mail.outbox[0])
@@ -800,7 +849,14 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             html_content,
             docket_only_alert.name,
             3,
-            self.de.docket.case_name,
+            docket.case_name,
+            0,
+        )
+        self._count_webhook_hits_and_child_hits(
+            list(webhook_events),
+            docket_only_alert.name,
+            3,
+            docket.case_name,
             0,
         )
 
@@ -814,6 +870,13 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             alert_de.docket.case_name,
             2,
         )
+        self._count_webhook_hits_and_child_hits(
+            list(webhook_events),
+            recap_only_alert.name,
+            1,
+            alert_de.docket.case_name,
+            2,
+        )
         self._assert_child_hits_content(
             html_content,
             recap_only_alert.name,
@@ -831,6 +894,13 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             alert_de.docket.case_name,
             1,
         )
+        self._count_webhook_hits_and_child_hits(
+            list(webhook_events),
+            cross_object_alert_with_hl.name,
+            1,
+            alert_de.docket.case_name,
+            1,
+        )
         self._assert_child_hits_content(
             html_content,
             cross_object_alert_with_hl.name,
@@ -840,19 +910,25 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
 
         # Assert HL in the cross_object_alert_with_hl
         self.assertIn(f"<strong>{docket.case_name}</strong>", html_content)
-        self.assertEqual(html_content.count(f"<strong>{docket.case_name}</strong>"), 1)
+        self.assertEqual(
+            html_content.count(f"<strong>{docket.case_name}</strong>"), 1
+        )
         self.assertIn(f"<strong>{docket.docket_number}</strong>", html_content)
         self.assertEqual(
-            html_content.count(f"<strong>{docket.docket_number}</strong>"), 1)
+            html_content.count(f"<strong>{docket.docket_number}</strong>"), 1
+        )
         self.assertIn(f"<strong>{rd_2.plain_text}</strong>", html_content)
         self.assertEqual(
-            html_content.count(f"<strong>{rd_2.plain_text}</strong>"), 1)
+            html_content.count(f"<strong>{rd_2.plain_text}</strong>"), 1
+        )
         self.assertIn(f"<strong>{rd_2.description}</strong>", html_content)
         self.assertEqual(
-            html_content.count(f"<strong>{rd_2.description}</strong>"), 1)
+            html_content.count(f"<strong>{rd_2.description}</strong>"), 1
+        )
         self.assertIn("<strong>File Amicus Curiae</strong>", html_content)
         self.assertEqual(
-            html_content.count("<strong>File Amicus Curiae</strong>"), 1)
+            html_content.count("<strong>File Amicus Curiae</strong>"), 1
+        )
 
         # Assert email text version:
         txt_email = mail.outbox[0].body
diff --git a/cl/api/tasks.py b/cl/api/tasks.py
index b70420fe95..a0d6112444 100644
--- a/cl/api/tasks.py
+++ b/cl/api/tasks.py
@@ -1,6 +1,8 @@
 import json
+from collections import defaultdict
 from typing import Any
 
+from elasticsearch_dsl.response import Hit
 from rest_framework.renderers import JSONRenderer
 
 from cl.alerts.api_serializers import SearchAlertSerializerModel
@@ -10,9 +12,12 @@
 from cl.api.webhooks import send_webhook_event
 from cl.celery_init import app
 from cl.corpus_importer.api_serializers import DocketEntrySerializer
-from cl.search.api_serializers import V3OAESResultSerializer
+from cl.search.api_serializers import (
+    RECAPESResultSerializer,
+    V3OAESResultSerializer,
+)
 from cl.search.api_utils import ResultObject
-from cl.search.models import DocketEntry
+from cl.search.models import SEARCH_TYPES, DocketEntry
 
 
 @app.task()
@@ -79,25 +84,47 @@ def send_docket_alert_webhook_events(
 
 @app.task()
 def send_es_search_alert_webhook(
-    results: list[dict[str, Any]],
+    results: list[dict[str, Any]] | list[Hit],
     webhook_pk: int,
-    alert: Alert,
+    alert_pk: int,
 ) -> None:
     """Send a search alert webhook event containing search results from a
     search alert object.
 
     :param results: The search results returned by SOLR for this alert.
     :param webhook_pk: The webhook endpoint ID object to send the event to.
-    :param alert: The search alert object.
+    :param alert_pk: The search alert ID.
     """
 
     webhook = Webhook.objects.get(pk=webhook_pk)
+    alert = Alert.objects.get(pk=alert_pk)
     serialized_alert = SearchAlertSerializerModel(alert).data
-    es_results = []
-    for result in results:
-        result["snippet"] = result["text"]
-        es_results.append(ResultObject(initial=result))
-    serialized_results = V3OAESResultSerializer(es_results, many=True).data
+    match alert.alert_type:
+        case SEARCH_TYPES.ORAL_ARGUMENT:
+            es_results = []
+            for result in results:
+                result["snippet"] = result["text"]
+                es_results.append(ResultObject(initial=result))
+            serialized_results = V3OAESResultSerializer(
+                es_results, many=True
+            ).data
+        case SEARCH_TYPES.RECAP:
+            for result in results:
+                child_result_objects = []
+                if hasattr(result, "child_docs"):
+                    for child_doc in result.child_docs:
+                        child_result_objects.append(
+                            defaultdict(
+                                lambda: None, child_doc["_source"].to_dict()
+                            )
+                        )
+                result["child_docs"] = child_result_objects
+            serialized_results = RECAPESResultSerializer(
+                results, many=True
+            ).data
+        case _:
+            # No implemented alert type.
+            return None
 
     post_content = {
         "webhook": generate_webhook_key_content(webhook),
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 9f6447c0e1..b51d149ff6 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -4,7 +4,6 @@
 import re
 import time
 import traceback
-from collections import defaultdict
 from copy import deepcopy
 from dataclasses import fields
 from functools import reduce, wraps

From b5016ba122caed7de1d207622db3fefcd30fadda Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 28 Jun 2024 21:07:55 -0600
Subject: [PATCH 09/33] fix(alerts): Schedule wly and mly RECAP Search Alerts

---
 .../commands/cl_send_recap_alerts.py          | 244 ++++++++++++------
 cl/alerts/tests/tests_recap_alerts.py         |  59 ++++-
 2 files changed, 221 insertions(+), 82 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index d4a533c69d..a00d4cf148 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -1,3 +1,4 @@
+import copy
 import datetime
 import traceback
 
@@ -6,12 +7,14 @@
 from django.http import QueryDict
 from django.utils.timezone import now
 from elasticsearch.exceptions import RequestError, TransportError
+from elasticsearch_dsl.response import Hit
 from redis import Redis
 
-from cl.alerts.models import Alert
+from cl.alerts.models import Alert, ScheduledAlertHit
 from cl.alerts.tasks import send_search_alert_emails
 from cl.alerts.utils import (
     add_document_hit_to_alert_set,
+    alert_hits_limit_reached,
     has_document_alert_hit_been_triggered,
     query_includes_rd_field,
     recap_document_hl_matched,
@@ -29,6 +32,7 @@
 )
 from cl.search.models import SEARCH_TYPES, Docket
 from cl.stats.utils import tally_stat
+from cl.users.models import UserProfile
 
 
 def index_daily_recap_documents():
@@ -57,7 +61,7 @@ def should_docket_hit_be_included(
     return False
 
 
-def filter_rd_alert_hits(r, alert_id, rd_hits, check_rd_hl=False):
+def filter_rd_alert_hits(r: Redis, alert_id: int, rd_hits, check_rd_hl=False):
     """Filter RECAP document hits based on specified conditions.
 
     :param r: The Redis interface.
@@ -85,9 +89,98 @@ def filter_rd_alert_hits(r, alert_id, rd_hits, check_rd_hl=False):
     return rds_to_send
 
 
-def query_and_send_alerts(rate):
+def query_alerts(
+    search_params: QueryDict,
+) -> tuple[list[Hit] | None, int | None]:
+    try:
+        search_query = DocketSweepDocument.search()
+        return do_es_sweep_alert_query(
+            search_query,
+            search_params,
+        )
+    except (
+        UnbalancedParenthesesQuery,
+        UnbalancedQuotesQuery,
+        BadProximityQuery,
+        TransportError,
+        ConnectionError,
+        RequestError,
+    ):
+        traceback.print_exc()
+        logger.info(f"Search for this alert failed: {search_params}\n")
+        return None, None
+
+
+def process_alert_hits(
+    r: Redis, results: list[Hit], search_params: QueryDict, alert_id: int
+) -> list[Hit]:
+    """Process alert hits by filtering and prepare the results to send based
+    on alert conditions.
+
+    :param r: The Redis instance.
+    :param results: A list of Hit objects containing search results.
+    :param search_params: Query parameters used for the search.
+    :param alert_id: The ID of the alert being processed.
+    :return: A list of Hit objects that are filtered and prepared to be sent.
+    """
+
+    includes_rd_fields = query_includes_rd_field(search_params)
+    results_to_send = []
+    if len(results) > 0:
+        for hit in results:
+            if not includes_rd_fields:
+                # Possible Docket-only alert
+                rds_to_send = filter_rd_alert_hits(
+                    r, alert_id, hit["child_docs"], check_rd_hl=True
+                )
+                if rds_to_send:
+                    # Cross-object query
+                    hit["child_docs"] = rds_to_send
+                    results_to_send.append(hit)
+                elif should_docket_hit_be_included(r, alert_id, hit.docket_id):
+                    # Docket-only alert
+                    hit["child_docs"] = []
+                    results_to_send.append(hit)
+                    add_document_hit_to_alert_set(
+                        r, alert_id, "d", hit.docket_id
+                    )
+            else:
+                # RECAP-only alerts or cross-object alerts
+                rds_to_send = filter_rd_alert_hits(
+                    r, alert_id, hit["child_docs"]
+                )
+                if rds_to_send:
+                    # Cross-object alert
+                    hit["child_docs"] = rds_to_send
+                    results_to_send.append(hit)
+    return results_to_send
+
+
+def send_search_alert_webhooks(
+    user: UserProfile.user, results_to_send: list[Hit], alert_id: int
+) -> None:
+    """Send webhook events for search alerts if the user has SEARCH_ALERT
+    endpoints enabled.
+
+    :param user: The user object whose webhooks need to be checked.
+    :param results_to_send: A list of Hit objects that contain the search
+    results to be sent.
+    :param alert_id: The Alert ID to be sent in the webhook.
+    """
+    user_webhooks = user.webhooks.filter(
+        event_type=WebhookEventType.SEARCH_ALERT, enabled=True
+    )
+    for user_webhook in user_webhooks:
+        send_es_search_alert_webhook.delay(
+            results_to_send, user_webhook.pk, alert_id
+        )
+
+
+def query_and_send_alerts(rate: str) -> None:
     r = get_redis_interface("CACHE")
-    alert_users = User.objects.filter(alerts__rate=rate).distinct()
+    alert_users: UserProfile.user = User.objects.filter(
+        alerts__rate=rate
+    ).distinct()
     alerts_sent_count = 0
     now_time = datetime.datetime.now()
     for user in alert_users:
@@ -101,80 +194,29 @@ def query_and_send_alerts(rate):
         alerts_to_update = []
         for alert in alerts:
             search_params = QueryDict(alert.query.encode(), mutable=True)
-            includes_rd_fields = query_includes_rd_field(search_params)
-            try:
-                search_query = DocketSweepDocument.search()
-                results, total_hits = do_es_sweep_alert_query(
-                    search_query,
-                    search_params,
-                )
-            except (
-                UnbalancedParenthesesQuery,
-                UnbalancedQuotesQuery,
-                BadProximityQuery,
-                TransportError,
-                ConnectionError,
-                RequestError,
-            ):
-                traceback.print_exc()
-                logger.info(f"Search for this alert failed: {alert.query}\n")
+            results, _ = query_alerts(search_params)
+            if not results:
                 continue
-
             alerts_to_update.append(alert.pk)
-            if len(results) > 0:
-                search_type = search_params.get("type", SEARCH_TYPES.RECAP)
-                results_to_send = []
-                for hit in results:
-                    if not includes_rd_fields:
-                        # Possible Docket-only alert
-                        rds_to_send = filter_rd_alert_hits(
-                            r, alert.pk, hit["child_docs"], check_rd_hl=True
-                        )
-                        if rds_to_send:
-                            # Cross-object query
-                            hit["child_docs"] = rds_to_send
-                            results_to_send.append(hit)
-                        elif should_docket_hit_be_included(
-                            r, alert.pk, hit.docket_id
-                        ):
-                            # Docket-only alert
-                            hit["child_docs"] = []
-                            results_to_send.append(hit)
-                            add_document_hit_to_alert_set(
-                                r, alert.pk, "d", hit.docket_id
-                            )
-                    else:
-                        # RECAP-only alerts or cross-object alerts
-                        rds_to_send = filter_rd_alert_hits(
-                            r, alert.pk, hit["child_docs"]
-                        )
-                        if rds_to_send:
-                            # Cross-object alert
-                            hit["child_docs"] = rds_to_send
-                            results_to_send.append(hit)
-
-                if results_to_send:
-                    hits.append(
-                        [
-                            alert,
-                            search_type,
-                            results_to_send,
-                            len(results_to_send),
-                        ]
-                    )
-                    alert.query_run = search_params.urlencode()
-                    alert.date_last_hit = now()
-                    alert.save()
-
-                    # Send webhook event if the user has a SEARCH_ALERT
-                    # endpoint enabled.
-                    user_webhooks = user.webhooks.filter(
-                        event_type=WebhookEventType.SEARCH_ALERT, enabled=True
-                    )
-                    for user_webhook in user_webhooks:
-                        send_es_search_alert_webhook.delay(
-                            results_to_send, user_webhook.pk, alert.pk
-                        )
+            search_type = search_params.get("type", SEARCH_TYPES.RECAP)
+            results_to_send = process_alert_hits(
+                r, results, search_params, alert.pk
+            )
+            if results_to_send:
+                hits.append(
+                    [
+                        alert,
+                        search_type,
+                        results_to_send,
+                        len(results_to_send),
+                    ]
+                )
+                alert.query_run = search_params.urlencode()  # type: ignore
+                alert.date_last_hit = now()
+                alert.save()
+
+                # Send webhooks
+                send_search_alert_webhooks(user, results_to_send, alert.pk)
 
         if hits:
             send_search_alert_emails.delay([(user.pk, hits)])
@@ -188,9 +230,50 @@ def query_and_send_alerts(rate):
         logger.info(f"Sent {alerts_sent_count} {rate} email alerts.")
 
 
-def query_and_schedule_wly_and_mly_alerts():
-    # TODO implement
-    pass
+def query_and_schedule_alerts(rate: str):
+    r = get_redis_interface("CACHE")
+    alert_users = User.objects.filter(alerts__rate=rate).distinct()
+    for user in alert_users:
+        alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP)
+        logger.info(f"Running '{rate}' alerts for user '{user}': {alerts}")
+        scheduled_hits_to_create = []
+        for alert in alerts:
+            search_params = QueryDict(alert.query.encode(), mutable=True)
+            results, _ = query_alerts(search_params)
+            if not results:
+                continue
+            results_to_send = process_alert_hits(
+                r, results, search_params, alert.pk
+            )
+            if results_to_send:
+                for hit in results_to_send:
+                    # Schedule DAILY, WEEKLY and MONTHLY Alerts
+                    if alert_hits_limit_reached(alert.pk, user.pk):
+                        # Skip storing hits for this alert-user combination because
+                        # the SCHEDULED_ALERT_HITS_LIMIT has been reached.
+                        continue
+
+                    child_result_objects = []
+                    hit_copy = copy.deepcopy(hit)
+                    if hasattr(hit_copy, "child_docs"):
+                        for child_doc in hit_copy.child_docs:
+                            child_result_objects.append(
+                                child_doc["_source"].to_dict()
+                            )
+                    hit_copy["child_docs"] = child_result_objects
+                    scheduled_hits_to_create.append(
+                        ScheduledAlertHit(
+                            user=user,
+                            alert=alert,
+                            document_content=hit_copy.to_dict(),
+                        )
+                    )
+                    # Send webhooks
+                    send_search_alert_webhooks(user, results_to_send, alert.pk)
+
+        # Create scheduled WEEKLY and MONTHLY Alerts in bulk.
+        if scheduled_hits_to_create:
+            ScheduledAlertHit.objects.bulk_create(scheduled_hits_to_create)
 
 
 class Command(VerboseCommand):
@@ -201,4 +284,5 @@ def handle(self, *args, **options):
         index_daily_recap_documents()
         query_and_send_alerts(Alert.REAL_TIME)
         query_and_send_alerts(Alert.DAILY)
-        query_and_schedule_wly_and_mly_alerts()
+        query_and_schedule_alerts(Alert.WEEKLY)
+        query_and_schedule_alerts(Alert.MONTHLY)
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index d910c67272..d8e203edee 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -11,7 +11,7 @@
 from lxml import html
 
 from cl.alerts.factories import AlertFactory
-from cl.alerts.models import SEARCH_TYPES, Alert
+from cl.alerts.models import SEARCH_TYPES, Alert, ScheduledAlertHit
 from cl.alerts.utils import query_includes_rd_field, recap_document_hl_matched
 from cl.api.factories import WebhookFactory
 from cl.api.models import WebhookEvent, WebhookEventType
@@ -350,7 +350,7 @@ def test_filter_recap_alerts_to_send(self) -> None:
         AlertFactory(
             user=self.user_profile_2.user,
             rate=Alert.REAL_TIME,
-            name="Test RT Opinion Alert",
+            name="Test RT RECAP Alert",
             query='q="401 Civil"',
         )
         AlertFactory(
@@ -944,3 +944,58 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
                     txt_email,
                     msg="RECAPDocument wasn't found in the email content.",
                 )
+
+    def test_schedule_wly_and_mly_recap_alerts(self) -> None:
+        """Test Weekly and Monthly RECAP Search Alerts are scheduled daily
+        before being sent later.
+        """
+
+        docket_only_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.WEEKLY,
+            name="Test Alert Docket Only",
+            query='q="401 Civil"&type=r',
+        )
+        recap_only_alert = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.MONTHLY,
+            name="Test Alert RECAP Only Docket Entry",
+            query=f"q=docket_entry_id:{self.de.pk}&type=r",
+        )
+        cross_object_alert_with_hl = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.WEEKLY,
+            name="Test Alert Cross-object",
+            query=f'q="401 Civil" id:{self.rd.pk}&type=r',
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts")
+
+        self.assertEqual(
+            len(mail.outbox), 0, msg="Outgoing emails don't match."
+        )
+        schedule_alerts = ScheduledAlertHit.objects.all()
+        self.assertEqual(schedule_alerts.count(), 3)
+
+        # Assert webhooks.
+        webhook_events = WebhookEvent.objects.all().values_list(
+            "content", flat=True
+        )
+        self.assertEqual(len(webhook_events), 3)
+
+        # Send  Weekly alerts and check assertions.
+        call_command("cl_send_scheduled_alerts", rate=Alert.WEEKLY)
+        self.assertEqual(
+            len(mail.outbox), 1, msg="Outgoing emails don't match."
+        )
+
+        # Send  Monthly alerts and check assertions.
+        call_command("cl_send_scheduled_alerts", rate=Alert.MONTHLY)
+        self.assertEqual(
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
+        )

From 4a128bf1a3800df58b9db2f4f13765e0685a318e Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 1 Jul 2024 20:25:54 -0600
Subject: [PATCH 10/33] fix(alerts): Copy documents from the main index to the
 sweep index using the Re Index API

---
 .../commands/cl_send_recap_alerts.py          | 237 ++++++++++++++++--
 cl/alerts/tests/tests_recap_alerts.py         | 198 +++++++++++++--
 cl/search/documents.py                        |  25 +-
 cl/search/es_indices.py                       |   9 -
 .../cl_index_parent_and_child_docs.py         |   3 -
 cl/search/tasks.py                            |  18 +-
 6 files changed, 414 insertions(+), 76 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index a00d4cf148..ec88eed5cc 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -1,12 +1,17 @@
 import copy
 import datetime
+import time
 import traceback
+from typing import Any
 
+import pytz
 from asgiref.sync import async_to_sync
 from django.contrib.auth.models import User
 from django.http import QueryDict
-from django.utils.timezone import now
+from django.utils import timezone
+from elasticsearch import Elasticsearch
 from elasticsearch.exceptions import RequestError, TransportError
+from elasticsearch_dsl import connections
 from elasticsearch_dsl.response import Hit
 from redis import Redis
 
@@ -24,7 +29,7 @@
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.lib.redis_utils import get_redis_interface
-from cl.search.documents import DocketSweepDocument
+from cl.search.documents import DocketDocument, DocketSweepDocument
 from cl.search.exception import (
     BadProximityQuery,
     UnbalancedParenthesesQuery,
@@ -35,9 +40,197 @@
 from cl.users.models import UserProfile
 
 
-def index_daily_recap_documents():
-    # TODO implement
-    pass
+def get_task_status(task_id: str, es: Elasticsearch) -> dict[str, Any]:
+    """Fetch the status of a task from Elasticsearch.
+
+    :param task_id: The ID of the task to fetch the status for.
+    :param es: The Elasticsearch client instance.
+    :return: The status of the task if successful, or an empty dictionary if
+    an error occurs.
+    """
+    try:
+        return es.tasks.get(task_id=task_id)
+    except (
+        TransportError,
+        ConnectionError,
+        RequestError,
+    ) as e:
+        logger.error("Error getting sweep alert index task status: %s", e)
+        return {}
+
+
+def index_daily_recap_documents(
+    r: Redis, source_index: str, target_index: str, testing: bool = False
+) -> int:
+    """Index Dockets added/modified during the day and all their RECAPDocuments
+    and RECAPDocuments added/modified during the day and their parent Dockets.
+    It uses the ES re_index API,
+
+    :param r: Redis client instance.
+    :param source_index: The source Elasticsearch index from which documents
+     will be queried.
+    :param target_index: The target Elasticsearch index to which documents will
+     be re-indexed.
+    :param testing: Boolean flag for testing mode.
+    :return: The total number of documents re-indexed.
+    """
+
+    if not r.exists("alert_sweep:query_date"):
+        # In case of a failure, store the date when alerts should be queried in
+        # Redis, so the command can be resumed.
+        local_now = timezone.localtime().replace(tzinfo=None)
+        local_midnight = local_now.replace(
+            hour=0, minute=0, second=0, microsecond=0
+        )
+        r.set("alert_sweep:query_date", local_midnight.isoformat())
+
+    else:
+        # If "alert_sweep:query_date" already exists get it from Redis.
+        local_midnight_str: str = str(r.get("alert_sweep:query_date"))
+        local_midnight = datetime.datetime.fromisoformat(local_midnight_str)
+
+    es = connections.get_connection()
+    # Convert the local (PDT) midnight time to UTC
+    local_timezone = pytz.timezone(timezone.get_current_timezone_name())
+    local_midnight_localized = local_timezone.localize(local_midnight)
+    local_midnight_utc = local_midnight_localized.astimezone(pytz.utc)
+    next_day_utc = local_midnight_utc + datetime.timedelta(days=1)
+
+    today_datetime_iso = local_midnight_utc.isoformat().replace("+00:00", "Z")
+    next_day_utc_iso = next_day_utc.isoformat().replace("+00:00", "Z")
+
+    # Re Index API query.
+    query = {
+        "bool": {
+            "should": [
+                # Dockets added/modified today
+                {
+                    "bool": {
+                        "must": [
+                            {
+                                "range": {
+                                    "timestamp": {
+                                        "gte": today_datetime_iso,
+                                        "lt": next_day_utc_iso,
+                                    }
+                                }
+                            },
+                            {"term": {"docket_child": "docket"}},
+                        ]
+                    }
+                },
+                # RECAPDocuments with parents added/modified today
+                {
+                    "has_parent": {
+                        "parent_type": "docket",
+                        "query": {
+                            "range": {
+                                "timestamp": {
+                                    "gte": today_datetime_iso,
+                                    "lt": next_day_utc_iso,
+                                }
+                            }
+                        },
+                    }
+                },
+                # RECAPDocuments added/modified today
+                {
+                    "bool": {
+                        "must": [
+                            {
+                                "range": {
+                                    "timestamp": {
+                                        "gte": today_datetime_iso,
+                                        "lt": next_day_utc_iso,
+                                    }
+                                }
+                            },
+                            {"term": {"docket_child": "recap_document"}},
+                        ]
+                    }
+                },
+                # Dockets that are parents of RECAPDocuments added/modified today
+                {
+                    "has_child": {
+                        "type": "recap_document",
+                        "query": {
+                            "range": {
+                                "timestamp": {
+                                    "gte": today_datetime_iso,
+                                    "lt": next_day_utc_iso,
+                                }
+                            }
+                        },
+                    }
+                },
+            ]
+        }
+    }
+
+    if not r.exists("alert_sweep:task_id"):
+        # In case of a failure, store the task_id in Redis so the command
+        # can be resumed.
+        response = es.reindex(
+            source={"index": source_index, "query": query},
+            dest={"index": target_index},
+            wait_for_completion=False,
+            refresh=True,
+        )
+        # Store the task ID in Redis
+        task_id = response["task"]
+        r.set("alert_sweep:task_id", task_id)
+    else:
+        task_id = r.get("alert_sweep:task_id")
+
+    estimated_time_remaining = 0.1 if testing else 60
+    time.sleep(estimated_time_remaining)
+    task_info = get_task_status(task_id, es)
+    if task_info:
+        status = task_info["task"]["status"]
+        created = status["created"]
+        total = status["total"]
+    else:
+        task_info["completed"] = False
+        created = 0
+        total = 0
+
+    iterations_count = 0
+    while not task_info["completed"]:
+        logger.info(
+            f"Task progress: {created}/{total} documents. Estimated time to"
+            f" finish: {estimated_time_remaining}."
+        )
+        task_info = get_task_status(task_id, es)
+        time.sleep(estimated_time_remaining)
+        if task_info and not task_info["completed"]:
+            status = task_info["task"]["status"]
+            start_time_millis = task_info["task"]["start_time_in_millis"]
+            start_time = datetime.datetime.fromtimestamp(
+                start_time_millis / 1000.0
+            )
+            created = status["created"]
+            total = status["total"]
+            if total and created:
+                estimated_time_remaining = datetime.timedelta(
+                    seconds=(
+                        (datetime.datetime.now() - start_time).total_seconds()
+                        / created
+                    )
+                    * (total - created)
+                ).total_seconds()
+        if not task_info:
+            iterations_count += 1
+        if iterations_count > 10:
+            logger.error(
+                "Re_index alert sweep index task has failed: %s/%s",
+                created,
+                total,
+            )
+            break
+
+    r.delete("alert_sweep:query_date")
+    r.delete("alert_sweep:task_id")
+    return total
 
 
 def should_docket_hit_be_included(
@@ -56,7 +249,7 @@ def should_docket_hit_be_included(
         return False
     date_modified = docket.date_modified.date()
     if not has_document_alert_hit_been_triggered(r, alert_id, "d", docket_id):
-        if date_modified == now().date():
+        if date_modified == timezone.now().date():
             return True
     return False
 
@@ -176,8 +369,7 @@ def send_search_alert_webhooks(
         )
 
 
-def query_and_send_alerts(rate: str) -> None:
-    r = get_redis_interface("CACHE")
+def query_and_send_alerts(r: Redis, rate: str) -> None:
     alert_users: UserProfile.user = User.objects.filter(
         alerts__rate=rate
     ).distinct()
@@ -212,7 +404,7 @@ def query_and_send_alerts(rate: str) -> None:
                     ]
                 )
                 alert.query_run = search_params.urlencode()  # type: ignore
-                alert.date_last_hit = now()
+                alert.date_last_hit = timezone.now()
                 alert.save()
 
                 # Send webhooks
@@ -230,8 +422,7 @@ def query_and_send_alerts(rate: str) -> None:
         logger.info(f"Sent {alerts_sent_count} {rate} email alerts.")
 
 
-def query_and_schedule_alerts(rate: str):
-    r = get_redis_interface("CACHE")
+def query_and_schedule_alerts(r: Redis, rate: str):
     alert_users = User.objects.filter(alerts__rate=rate).distinct()
     for user in alert_users:
         alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP)
@@ -279,10 +470,24 @@ def query_and_schedule_alerts(rate: str):
 class Command(VerboseCommand):
     help = "Send RECAP Search Alerts."
 
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--testing-mode",
+            action="store_true",
+            help="Use this flag for testing purposes.",
+        )
+
     def handle(self, *args, **options):
         super().handle(*args, **options)
-        index_daily_recap_documents()
-        query_and_send_alerts(Alert.REAL_TIME)
-        query_and_send_alerts(Alert.DAILY)
-        query_and_schedule_alerts(Alert.WEEKLY)
-        query_and_schedule_alerts(Alert.MONTHLY)
+        testing_mode = options.get("testing_mode", False)
+        r = get_redis_interface("CACHE")
+        index_daily_recap_documents(
+            r,
+            DocketDocument._index._name,
+            DocketSweepDocument._index._name,
+            testing=testing_mode,
+        )
+        query_and_send_alerts(r, Alert.REAL_TIME)
+        query_and_send_alerts(r, Alert.DAILY)
+        query_and_schedule_alerts(r, Alert.WEEKLY)
+        query_and_schedule_alerts(r, Alert.MONTHLY)
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index d8e203edee..80cd0889be 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -8,17 +8,22 @@
 from django.test.utils import override_settings
 from django.utils.html import strip_tags
 from django.utils.timezone import now
+from elasticsearch_dsl import Q
 from lxml import html
 
 from cl.alerts.factories import AlertFactory
+from cl.alerts.management.commands.cl_send_recap_alerts import (
+    index_daily_recap_documents,
+)
 from cl.alerts.models import SEARCH_TYPES, Alert, ScheduledAlertHit
 from cl.alerts.utils import query_includes_rd_field, recap_document_hl_matched
 from cl.api.factories import WebhookFactory
 from cl.api.models import WebhookEvent, WebhookEventType
 from cl.donate.models import NeonMembership
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
+from cl.lib.redis_utils import get_redis_interface
 from cl.lib.test_helpers import RECAPSearchTestCase
-from cl.search.documents import DocketSweepDocument
+from cl.search.documents import DocketDocument, DocketSweepDocument
 from cl.search.factories import (
     DocketEntryWithParentsFactory,
     DocketFactory,
@@ -50,7 +55,6 @@ def setUpTestData(cls):
                 queue="celery",
                 pk_offset=0,
                 testing_mode=True,
-                sweep_index=True,
             )
 
             cls.user_profile = UserProfileWithParentsFactory()
@@ -69,6 +73,10 @@ def setUpTestData(cls):
                 enabled=True,
             )
 
+    def setUp(self):
+        DocketSweepDocument._index.delete(ignore=404)
+        DocketSweepDocument.init()
+
     @staticmethod
     def get_html_content_from_email(email_content):
         html_content = None
@@ -366,7 +374,7 @@ def test_filter_recap_alerts_to_send(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
 
         # Only the RECAP RT alert for a member and the RECAP DLY alert are sent.
         self.assertEqual(
@@ -378,6 +386,152 @@ def test_filter_recap_alerts_to_send(self) -> None:
         html_content = self.get_html_content_from_email(mail.outbox[1])
         self.assertIn(dly_recap_alert.name, html_content)
 
+    def test_index_daily_recap_documents(self) -> None:
+        """Test index_daily_recap_documents method over different documents
+        conditions.
+        """
+        r = get_redis_interface("CACHE")
+        recap_search = DocketDocument.search()
+        recap_dockets = recap_search.query(Q("match", docket_child="docket"))
+        self.assertEqual(recap_dockets.count(), 2)
+
+        recap_documents = recap_search.query(
+            Q("match", docket_child="recap_document")
+        )
+        self.assertEqual(recap_documents.count(), 3)
+
+        sweep_search = DocketSweepDocument.search()
+        self.assertEqual(
+            sweep_search.count(),
+            0,
+            msg="Wrong number of documents in the sweep index.",
+        )
+
+        # Index documents based Dockets changed today + all their
+        # RECAPDocuments indexed the same day.
+        with time_machine.travel(self.mock_date, tick=False):
+            documents_indexed = index_daily_recap_documents(
+                r,
+                DocketDocument._index._name,
+                DocketSweepDocument._index._name,
+                testing=True,
+            )
+        self.assertEqual(
+            documents_indexed, 5, msg="Wrong number of documents indexed."
+        )
+
+        sweep_search = DocketSweepDocument.search()
+        dockets_sweep = sweep_search.query(Q("match", docket_child="docket"))
+        self.assertEqual(dockets_sweep.count(), 2)
+
+        documents_sweep = sweep_search.query(
+            Q("match", docket_child="recap_document")
+        )
+        self.assertEqual(documents_sweep.count(), 3)
+
+        # Index Docket changed today + their RECAPDocuments indexed on
+        # previous days
+        with time_machine.travel(self.mock_date, tick=False):
+            docket = DocketFactory(
+                court=self.court,
+                case_name="SUBPOENAS SERVED CASE",
+                docket_number="1:21-bk-1234",
+                source=Docket.RECAP,
+            )
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                testing_mode=True,
+            )
+
+        # Its related RD is ingested two days before.
+        two_days_before = now() - datetime.timedelta(days=2)
+        mock_two_days_before = two_days_before.replace(hour=5)
+        with time_machine.travel(mock_two_days_before, tick=False):
+            alert_de = DocketEntryWithParentsFactory(
+                docket=docket,
+                entry_number=1,
+                date_filed=datetime.date(2024, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            rd = RECAPDocumentFactory(
+                docket_entry=alert_de,
+                description="Motion to File",
+                document_number="1",
+                is_available=True,
+            )
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                testing_mode=True,
+            )
+
+        # Run the indexer.
+        with time_machine.travel(self.mock_date, tick=False):
+            documents_indexed = index_daily_recap_documents(
+                r,
+                DocketDocument._index._name,
+                DocketSweepDocument._index._name,
+                testing=True,
+            )
+        self.assertEqual(
+            documents_indexed, 7, msg="Wrong number of documents indexed."
+        )
+
+        # Index a RECAPDocument changed today including its parent Docket
+        # indexed on previous days.
+        with time_machine.travel(mock_two_days_before, tick=False):
+            docket_2 = DocketFactory(
+                court=self.court,
+                case_name="SUBPOENAS SERVED CASE OFF",
+                docket_number="1:21-bk-1250",
+                source=Docket.RECAP,
+            )
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                testing_mode=True,
+            )
+
+        # Its related RD is ingested today.
+        with time_machine.travel(self.mock_date, tick=False):
+            alert_de_2 = DocketEntryWithParentsFactory(
+                docket=docket_2,
+                entry_number=1,
+                date_filed=datetime.date(2024, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            rd_2 = RECAPDocumentFactory(
+                docket_entry=alert_de_2,
+                description="Motion to File Lorem",
+                document_number="2",
+            )
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                testing_mode=True,
+            )
+
+        # Run the indexer.
+        with time_machine.travel(self.mock_date, tick=False):
+            documents_indexed = index_daily_recap_documents(
+                r,
+                DocketDocument._index._name,
+                DocketSweepDocument._index._name,
+                testing=True,
+            )
+        self.assertEqual(
+            documents_indexed, 9, msg="Wrong number of documents indexed."
+        )
+
     def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         """Test RECAP alerts can be properly filtered out according to
         their query and hits matched conditions.
@@ -410,7 +564,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
 
         self.assertEqual(
             len(mail.outbox), 1, msg="Outgoing emails don't match."
@@ -473,14 +627,13 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 pacer_doc_id="018036652436",
                 plain_text="plain text for 018036652436",
             )
-        call_command(
-            "cl_index_parent_and_child_docs",
-            search_type=SEARCH_TYPES.RECAP,
-            queue="celery",
-            pk_offset=0,
-            testing_mode=True,
-            sweep_index=True,
-        )
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                testing_mode=True,
+            )
 
         with mock.patch(
             "cl.api.webhooks.requests.post",
@@ -488,7 +641,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
         # The RD ingestion's shouldn't match the docket-only alert.
         self.assertEqual(
             len(mail.outbox), 1, msg="Outgoing emails don't match."
@@ -507,7 +660,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
         # 1 New alert should be triggered.
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
@@ -541,7 +694,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
         # No new alert should be triggered.
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
@@ -564,7 +717,6 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             queue="celery",
             pk_offset=0,
             testing_mode=True,
-            sweep_index=True,
         )
 
         with mock.patch(
@@ -573,7 +725,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing only the new RD created.
         self.assertEqual(
@@ -602,7 +754,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing two RDs (rd and rd_2)
         self.assertEqual(
@@ -636,7 +788,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing one RD (rd_2)
         self.assertEqual(
@@ -687,7 +839,6 @@ def test_limit_alert_case_child_hits(self) -> None:
             queue="celery",
             pk_offset=0,
             testing_mode=True,
-            sweep_index=True,
         )
         recap_only_alert = AlertFactory(
             user=self.user_profile.user,
@@ -701,7 +852,7 @@ def test_limit_alert_case_child_hits(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
 
         self.assertEqual(
             len(mail.outbox), 1, msg="Outgoing emails don't match."
@@ -820,7 +971,6 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             queue="celery",
             pk_offset=0,
             testing_mode=True,
-            sweep_index=True,
         )
         with mock.patch(
             "cl.api.webhooks.requests.post",
@@ -828,7 +978,7 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
 
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
@@ -974,7 +1124,7 @@ def test_schedule_wly_and_mly_recap_alerts(self) -> None:
                 200, mock_raw=True
             ),
         ), time_machine.travel(self.mock_date, tick=False):
-            call_command("cl_send_recap_alerts")
+            call_command("cl_send_recap_alerts", testing_mode=True)
 
         self.assertEqual(
             len(mail.outbox), 0, msg="Outgoing emails don't match."
diff --git a/cl/search/documents.py b/cl/search/documents.py
index d64f4eb724..85c082ab25 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -1,5 +1,6 @@
 from datetime import datetime
 
+from django.conf import settings
 from django.http import QueryDict
 from django.utils.html import escape, strip_tags
 from django_elasticsearch_dsl import Document, fields
@@ -29,7 +30,6 @@
     parenthetical_group_index,
     people_db_index,
     recap_index,
-    recap_sweep_index,
 )
 from cl.search.forms import SearchForm
 from cl.search.models import (
@@ -1829,17 +1829,22 @@ def prepare_cluster_child(self, instance):
         return "opinion_cluster"
 
 
-@recap_sweep_index.document
 class DocketSweepDocument(DocketDocument):
-
-    class Django:
-        model = Docket
-        ignore_signals = True
+    class Index:
+        name = "recap_sweep"
+        settings = {
+            "number_of_shards": settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS,
+            "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
+            "analysis": settings.ELASTICSEARCH_DSL["analysis"],
+        }
 
 
-@recap_sweep_index.document
 class ESRECAPSweepDocument(ESRECAPDocument):
 
-    class Django:
-        model = RECAPDocument
-        ignore_signals = True
+    class Index:
+        name = "recap_sweep"
+        settings = {
+            "number_of_shards": settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS,
+            "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
+            "analysis": settings.ELASTICSEARCH_DSL["analysis"],
+        }
diff --git a/cl/search/es_indices.py b/cl/search/es_indices.py
index bf129f0704..717a6abee9 100644
--- a/cl/search/es_indices.py
+++ b/cl/search/es_indices.py
@@ -53,12 +53,3 @@
     number_of_replicas=settings.ELASTICSEARCH_OPINION_NUMBER_OF_REPLICAS,
     analysis=settings.ELASTICSEARCH_DSL["analysis"],
 )
-
-
-# Define RECAP Nested elasticsearch index
-recap_sweep_index = Index("recap_sweep")
-recap_sweep_index.settings(
-    number_of_shards=settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS,
-    number_of_replicas=settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
-    analysis=settings.ELASTICSEARCH_DSL["analysis"],
-)
diff --git a/cl/search/management/commands/cl_index_parent_and_child_docs.py b/cl/search/management/commands/cl_index_parent_and_child_docs.py
index c4edec4ba0..57cdf390fc 100644
--- a/cl/search/management/commands/cl_index_parent_and_child_docs.py
+++ b/cl/search/management/commands/cl_index_parent_and_child_docs.py
@@ -480,7 +480,6 @@ def process_queryset(
         pk_offset = self.options["pk_offset"]
         document_type = self.options.get("document_type", None)
         missing = self.options.get("missing", False)
-        sweep_index = self.options.get("sweep_index", False)
         fields_map = {}
         if event_doc_type == EventTable.DOCKET:
             fields_map = recap_document_field_mapping["save"][Docket][
@@ -535,7 +534,6 @@ def process_queryset(
                             chunk,
                             search_type,
                             testing_mode=testing_mode,
-                            sweep_index=sweep_index,
                         ).set(queue=queue).apply_async()
 
                     case "index_parent_or_child_docs":
@@ -544,7 +542,6 @@ def process_queryset(
                             search_type,
                             document_type,
                             testing_mode=testing_mode,
-                            sweep_index=sweep_index,
                         ).set(queue=queue).apply_async()
                     case "remove_parent_and_child_docs_by_query":
                         remove_parent_and_child_docs_by_query.si(
diff --git a/cl/search/tasks.py b/cl/search/tasks.py
index 38bb8cdbe1..5039613578 100644
--- a/cl/search/tasks.py
+++ b/cl/search/tasks.py
@@ -968,7 +968,6 @@ def index_parent_and_child_docs(
     instance_ids: list[int],
     search_type: str,
     testing_mode: bool = False,
-    sweep_index: bool = False,
 ) -> None:
     """Index parent and child documents in Elasticsearch.
 
@@ -988,12 +987,8 @@ def index_parent_and_child_docs(
             child_es_document = PositionDocument
             child_id_property = "POSITION"
         case SEARCH_TYPES.RECAP:
-            parent_es_document = (
-                DocketSweepDocument if sweep_index else DocketDocument
-            )
-            child_es_document = (
-                ESRECAPSweepDocument if sweep_index else ESRECAPDocument
-            )
+            parent_es_document = DocketDocument
+            child_es_document = ESRECAPDocument
             child_id_property = "RECAP"
         case SEARCH_TYPES.OPINION:
             parent_es_document = OpinionClusterDocument
@@ -1078,7 +1073,6 @@ def index_parent_or_child_docs(
     search_type: str,
     document_type: str | None,
     testing_mode: bool = False,
-    sweep_index: bool = False,
 ) -> None:
     """Index parent or child documents in Elasticsearch.
 
@@ -1097,12 +1091,8 @@ def index_parent_or_child_docs(
     child_instances = QuerySet()
     match search_type:
         case SEARCH_TYPES.RECAP:
-            parent_es_document = (
-                DocketSweepDocument if sweep_index else DocketDocument
-            )
-            child_es_document = (
-                ESRECAPSweepDocument if sweep_index else ESRECAPDocument
-            )
+            parent_es_document = DocketDocument
+            child_es_document = ESRECAPDocument
             child_id_property = "RECAP"
             if document_type == "parent":
                 parent_instances = Docket.objects.filter(pk__in=instance_ids)

From 3a4a456259c9ec4cb6286232cd3d75e544163d67 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 2 Jul 2024 10:16:23 -0600
Subject: [PATCH 11/33] fix(alerts): Fixed RECAPSweepDocument index mapping

- Enabled RECAP Search alerts UI behind a waffle.
- Added alert frequency estimation for RECAP
---
 .../commands/cl_send_recap_alerts.py          |  6 +-
 cl/alerts/tests/tests_recap_alerts.py         | 95 +++++++++++++++----
 cl/api/urls.py                                |  2 +-
 cl/api/views.py                               | 17 +++-
 cl/custom_filters/templatetags/extras.py      | 26 ++++-
 cl/lib/elasticsearch_utils.py                 |  2 +-
 cl/search/documents.py                        | 13 +--
 cl/search/tasks.py                            |  2 -
 cl/search/templates/search.html               |  7 +-
 9 files changed, 127 insertions(+), 43 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index ec88eed5cc..864dddbf51 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -29,7 +29,7 @@
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.lib.redis_utils import get_redis_interface
-from cl.search.documents import DocketDocument, DocketSweepDocument
+from cl.search.documents import DocketDocument, RECAPSweepDocument
 from cl.search.exception import (
     BadProximityQuery,
     UnbalancedParenthesesQuery,
@@ -286,7 +286,7 @@ def query_alerts(
     search_params: QueryDict,
 ) -> tuple[list[Hit] | None, int | None]:
     try:
-        search_query = DocketSweepDocument.search()
+        search_query = RECAPSweepDocument.search()
         return do_es_sweep_alert_query(
             search_query,
             search_params,
@@ -484,7 +484,7 @@ def handle(self, *args, **options):
         index_daily_recap_documents(
             r,
             DocketDocument._index._name,
-            DocketSweepDocument._index._name,
+            RECAPSweepDocument._index._name,
             testing=testing_mode,
         )
         query_and_send_alerts(r, Alert.REAL_TIME)
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 80cd0889be..59b427841c 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -6,6 +6,7 @@
 from django.core import mail
 from django.core.management import call_command
 from django.test.utils import override_settings
+from django.urls import reverse
 from django.utils.html import strip_tags
 from django.utils.timezone import now
 from elasticsearch_dsl import Q
@@ -23,7 +24,7 @@
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.lib.redis_utils import get_redis_interface
 from cl.lib.test_helpers import RECAPSearchTestCase
-from cl.search.documents import DocketDocument, DocketSweepDocument
+from cl.search.documents import DocketDocument, RECAPSweepDocument
 from cl.search.factories import (
     DocketEntryWithParentsFactory,
     DocketFactory,
@@ -74,8 +75,8 @@ def setUpTestData(cls):
             )
 
     def setUp(self):
-        DocketSweepDocument._index.delete(ignore=404)
-        DocketSweepDocument.init()
+        RECAPSweepDocument._index.delete(ignore=404)
+        RECAPSweepDocument.init()
 
     @staticmethod
     def get_html_content_from_email(email_content):
@@ -239,12 +240,23 @@ def _count_webhook_hits_and_child_hits(
     async def test_recap_document_hl_matched(self) -> None:
         """Test recap_document_hl_matched method that determines weather a hit
         contains RECAPDocument HL fields."""
+
+        # Index base document factories.
+        r = get_redis_interface("CACHE")
+        with time_machine.travel(self.mock_date, tick=False):
+            index_daily_recap_documents(
+                r,
+                DocketDocument._index._name,
+                RECAPSweepDocument._index._name,
+                testing=True,
+            )
+
         # Docket-only query
         search_params = {
             "type": SEARCH_TYPES.RECAP,
             "q": '"401 Civil"',
         }
-        search_query = DocketSweepDocument.search()
+        search_query = RECAPSweepDocument.search()
         results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
             search_query,
             search_params,
@@ -259,7 +271,7 @@ async def test_recap_document_hl_matched(self) -> None:
             "type": SEARCH_TYPES.RECAP,
             "q": '"Mauris iaculis, leo sit amet hendrerit vehicula"',
         }
-        search_query = DocketSweepDocument.search()
+        search_query = RECAPSweepDocument.search()
         results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
             search_query,
             search_params,
@@ -274,7 +286,7 @@ async def test_recap_document_hl_matched(self) -> None:
             "type": SEARCH_TYPES.RECAP,
             "q": "SUBPOENAS SERVED OFF Mauris iaculis",
         }
-        search_query = DocketSweepDocument.search()
+        search_query = RECAPSweepDocument.search()
         results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
             search_query,
             search_params,
@@ -400,7 +412,7 @@ def test_index_daily_recap_documents(self) -> None:
         )
         self.assertEqual(recap_documents.count(), 3)
 
-        sweep_search = DocketSweepDocument.search()
+        sweep_search = RECAPSweepDocument.search()
         self.assertEqual(
             sweep_search.count(),
             0,
@@ -413,14 +425,14 @@ def test_index_daily_recap_documents(self) -> None:
             documents_indexed = index_daily_recap_documents(
                 r,
                 DocketDocument._index._name,
-                DocketSweepDocument._index._name,
+                RECAPSweepDocument._index._name,
                 testing=True,
             )
         self.assertEqual(
             documents_indexed, 5, msg="Wrong number of documents indexed."
         )
 
-        sweep_search = DocketSweepDocument.search()
+        sweep_search = RECAPSweepDocument.search()
         dockets_sweep = sweep_search.query(Q("match", docket_child="docket"))
         self.assertEqual(dockets_sweep.count(), 2)
 
@@ -475,7 +487,7 @@ def test_index_daily_recap_documents(self) -> None:
             documents_indexed = index_daily_recap_documents(
                 r,
                 DocketDocument._index._name,
-                DocketSweepDocument._index._name,
+                RECAPSweepDocument._index._name,
                 testing=True,
             )
         self.assertEqual(
@@ -525,7 +537,7 @@ def test_index_daily_recap_documents(self) -> None:
             documents_indexed = index_daily_recap_documents(
                 r,
                 DocketDocument._index._name,
-                DocketSweepDocument._index._name,
+                RECAPSweepDocument._index._name,
                 testing=True,
             )
         self.assertEqual(
@@ -807,6 +819,8 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         self.assertIn(cross_object_alert.name, txt_email)
         self.assertIn(rd_2.description, txt_email)
 
+        docket.delete()
+
     def test_limit_alert_case_child_hits(self) -> None:
         """Test limit case child hits up to 5 and display the "View additional
         results for this Case" button.
@@ -833,13 +847,13 @@ def test_limit_alert_case_child_hits(self) -> None:
                     # included in the case.
                     rd_descriptions.append(rd.description)
 
-        call_command(
-            "cl_index_parent_and_child_docs",
-            search_type=SEARCH_TYPES.RECAP,
-            queue="celery",
-            pk_offset=0,
-            testing_mode=True,
-        )
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                testing_mode=True,
+            )
         recap_only_alert = AlertFactory(
             user=self.user_profile.user,
             rate=Alert.REAL_TIME,
@@ -905,14 +919,16 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             source=Docket.RECAP,
             cause="410 Civil",
         )
+        dockets_created = []
         for i in range(3):
-            DocketFactory(
+            docket_created = DocketFactory(
                 court=self.court,
                 case_name=f"SUBPOENAS SERVED CASE {i}",
                 docket_number=f"1:21-bk-123{i}",
                 source=Docket.RECAP,
                 cause="410 Civil",
             )
+            dockets_created.append(docket_created)
 
         alert_de = DocketEntryWithParentsFactory(
             docket=docket,
@@ -1095,6 +1111,10 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
                     msg="RECAPDocument wasn't found in the email content.",
                 )
 
+        docket.delete()
+        for d in dockets_created:
+            d.delete()
+
     def test_schedule_wly_and_mly_recap_alerts(self) -> None:
         """Test Weekly and Monthly RECAP Search Alerts are scheduled daily
         before being sent later.
@@ -1149,3 +1169,40 @@ def test_schedule_wly_and_mly_recap_alerts(self) -> None:
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
         )
+
+    def test_alert_frequency_estimation(self):
+        """Test alert frequency ES API endpoint for RECAP Alerts."""
+
+        search_params = {
+            "type": SEARCH_TYPES.RECAP,
+            "q": "Frequency Test RECAP",
+        }
+        r = self.client.get(
+            reverse(
+                "alert_frequency", kwargs={"version": "4", "day_count": "100"}
+            ),
+            search_params,
+        )
+        self.assertEqual(r.json()["count"], 0)
+
+        mock_date = now().replace(day=1, hour=5)
+        with time_machine.travel(
+            mock_date, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
+            docket = DocketFactory(
+                court=self.court,
+                case_name="Frequency Test RECAP",
+                docket_number="1:21-bk-1240",
+                source=Docket.RECAP,
+                date_filed=now().date(),
+            )
+
+        r = self.client.get(
+            reverse(
+                "alert_frequency", kwargs={"version": "4", "day_count": "100"}
+            ),
+            search_params,
+        )
+        self.assertEqual(r.json()["count"], 1)
+
+        docket.delete()
diff --git a/cl/api/urls.py b/cl/api/urls.py
index 7413a287b4..ea8f0c67aa 100644
--- a/cl/api/urls.py
+++ b/cl/api/urls.py
@@ -319,7 +319,7 @@
         name="coverage_data_opinions",
     ),
     re_path(
-        r"^api/rest/v(?P<version>[123])/alert-frequency/(?P<day_count>\d+)/$",
+        r"^api/rest/v(?P<version>[1234])/alert-frequency/(?P<day_count>\d+)/$",
         views.get_result_count,
         name="alert_frequency",
     ),
diff --git a/cl/api/views.py b/cl/api/views.py
index 86941007b0..1d95e93410 100644
--- a/cl/api/views.py
+++ b/cl/api/views.py
@@ -20,7 +20,11 @@
     build_coverage_query,
     get_solr_interface,
 )
-from cl.search.documents import AudioDocument, OpinionClusterDocument
+from cl.search.documents import (
+    AudioDocument,
+    DocketDocument,
+    OpinionClusterDocument,
+)
 from cl.search.forms import SearchForm
 from cl.search.models import SEARCH_TYPES, Citation, Court, OpinionCluster
 from cl.simple_pages.coverage_utils import build_chart_data
@@ -271,7 +275,10 @@ async def get_result_count(request, version, day_count):
     es_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "o-es-active"
     )
-    is_es_form = es_flag_for_oa or es_flag_for_o
+    es_flag_for_r = await sync_to_async(waffle.flag_is_active)(
+        request, "recap-alerts-active"
+    )
+    is_es_form = es_flag_for_oa or es_flag_for_o or es_flag_for_r
     search_form = await sync_to_async(SearchForm)(
         request.GET.copy(), is_es_form=is_es_form
     )
@@ -296,6 +303,12 @@ async def get_result_count(request, version, day_count):
             total_query_results = await sync_to_async(
                 do_es_alert_estimation_query
             )(search_query, cd, day_count)
+        case SEARCH_TYPES.RECAP if es_flag_for_r:
+            # Elasticsearch version for RECAP
+            search_query = DocketDocument.search()
+            total_query_results = await sync_to_async(
+                do_es_alert_estimation_query
+            )(search_query, cd, day_count)
         case _:
 
             @sync_to_async
diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 53cc1a2b71..4ce97f3e63 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -2,9 +2,11 @@
 import re
 import urllib.parse
 
+import waffle
 from django import template
 from django.core.exceptions import ValidationError
 from django.template import Context
+from django.template.context import RequestContext
 from django.utils.formats import date_format
 from django.utils.html import format_html
 from django.utils.http import urlencode
@@ -12,7 +14,7 @@
 from elasticsearch_dsl import AttrDict, AttrList
 
 from cl.search.constants import ALERTS_HL_TAG, SEARCH_HL_TAG
-from cl.search.models import Docket, DocketEntry
+from cl.search.models import SEARCH_TYPES, Docket, DocketEntry
 
 register = template.Library()
 
@@ -259,3 +261,25 @@ def extract_q_value(query: str) -> str:
 
     parsed_query = urllib.parse.parse_qs(query)
     return parsed_query.get("q", [""])[0]
+
+
+@register.simple_tag(takes_context=True)
+def alerts_supported(context: RequestContext, search_type: str) -> str:
+    """Determine if search alerts are supported based on the search type and flag
+    status.
+
+    :param context: The template context, which includes the request, required
+    for the waffle flag.
+    :param search_type: The type of search being performed.
+    :return: True if alerts are supported, False otherwise.
+    """
+
+    request = context["request"]
+    return (
+        search_type == SEARCH_TYPES.OPINION
+        or search_type == SEARCH_TYPES.ORAL_ARGUMENT
+        or (
+            search_type == SEARCH_TYPES.RECAP
+            and waffle.flag_is_active(request, "recap-alerts-active")
+        )
+    )
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index b51d149ff6..5f24193886 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3009,7 +3009,7 @@ def do_es_alert_estimation_query(
     """
 
     match cd["type"]:
-        case SEARCH_TYPES.OPINION:
+        case SEARCH_TYPES.OPINION | SEARCH_TYPES.RECAP:
             after_field = "filed_after"
             before_field = "filed_before"
         case SEARCH_TYPES.ORAL_ARGUMENT:
diff --git a/cl/search/documents.py b/cl/search/documents.py
index 85c082ab25..378dbb9477 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -1829,18 +1829,7 @@ def prepare_cluster_child(self, instance):
         return "opinion_cluster"
 
 
-class DocketSweepDocument(DocketDocument):
-    class Index:
-        name = "recap_sweep"
-        settings = {
-            "number_of_shards": settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS,
-            "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
-            "analysis": settings.ELASTICSEARCH_DSL["analysis"],
-        }
-
-
-class ESRECAPSweepDocument(ESRECAPDocument):
-
+class RECAPSweepDocument(DocketDocument, ESRECAPDocument):
     class Index:
         name = "recap_sweep"
         settings = {
diff --git a/cl/search/tasks.py b/cl/search/tasks.py
index 5039613578..df7d337f26 100644
--- a/cl/search/tasks.py
+++ b/cl/search/tasks.py
@@ -40,9 +40,7 @@
     ES_CHILD_ID,
     AudioDocument,
     DocketDocument,
-    DocketSweepDocument,
     ESRECAPDocument,
-    ESRECAPSweepDocument,
     OpinionClusterDocument,
     OpinionDocument,
     PersonDocument,
diff --git a/cl/search/templates/search.html b/cl/search/templates/search.html
index 05cc09a60c..a67dc6f271 100644
--- a/cl/search/templates/search.html
+++ b/cl/search/templates/search.html
@@ -1,6 +1,7 @@
 {% extends 'base.html' %}
 {% load humanize %}
 {% load text_filters %}
+{% load extras %}
 {% load static %}
 {% load waffle_tags %}
 
@@ -177,7 +178,8 @@
 
       <div class="row">
             <div class="col-sm-12">
-              {% if search_form.type.value == SEARCH_TYPES.OPINION or search_form.type.value == SEARCH_TYPES.ORAL_ARGUMENT %}
+              {% alerts_supported search_form.type.value as search_alerts_supported %}
+              {% if search_alerts_supported %}
                 {% include "includes/alert_modal.html" %}
               {% endif %}
               <form action="/"
@@ -193,7 +195,8 @@
                                    id="id_q"
                                    autocomplete="off"
                                    type="text">
-                            {% if search_form.type.value == SEARCH_TYPES.OPINION or search_form.type.value == SEARCH_TYPES.ORAL_ARGUMENT %}
+                            {% alerts_supported search_form.type.value as search_alerts_supported %}
+                            {% if search_alerts_supported %}
                               {% if not error and get_string %}
                                 <span class="input-group-addon input-group-addon-blended">
                                   <i class="fa fa-bell-o fa-lg gray pointer"

From add980a1d5bb6dea48a15a2c5c03c33f095f983f Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 2 Jul 2024 19:41:51 -0600
Subject: [PATCH 12/33] fix(alerts): Tweak RECAP Alert estimation query to
 consider both Dockets + RD hits

- Fixed RECAP MLY and WLY scheduled alerts content.
---
 .../commands/cl_send_scheduled_alerts.py      |   4 +-
 cl/alerts/tasks.py                            |   4 +-
 cl/alerts/templates/alert_email_es.html       |   2 +-
 cl/alerts/templates/alert_email_es.txt        |   2 +-
 cl/alerts/tests/tests_recap_alerts.py         | 123 ++++++++++++++----
 cl/custom_filters/templatetags/extras.py      |  22 +++-
 cl/lib/elasticsearch_utils.py                 |  54 +++++++-
 .../templates/feeds/solr_desc_template.html   |   2 +-
 .../templates/includes/pa_search_result.html  |   4 +-
 .../templates/includes/search_result.html     |   4 +-
 10 files changed, 183 insertions(+), 38 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_scheduled_alerts.py b/cl/alerts/management/commands/cl_send_scheduled_alerts.py
index 8fefd675f3..e0b23c8420 100644
--- a/cl/alerts/management/commands/cl_send_scheduled_alerts.py
+++ b/cl/alerts/management/commands/cl_send_scheduled_alerts.py
@@ -99,7 +99,9 @@ def query_and_send_alerts_by_rate(rate: str) -> None:
                 )
             )
         if hits:
-            send_search_alert_emails.delay([(user_id, hits)])
+            send_search_alert_emails.delay(
+                [(user_id, hits)], scheduled_alert=True
+            )
             alerts_sent_count += 1
 
     # Update Alert's date_last_hit in bulk.
diff --git a/cl/alerts/tasks.py b/cl/alerts/tasks.py
index 037fe22b4c..885ac9b413 100644
--- a/cl/alerts/tasks.py
+++ b/cl/alerts/tasks.py
@@ -467,7 +467,8 @@ def send_webhook_alert_hits(
 
 @app.task(ignore_result=True)
 def send_search_alert_emails(
-    email_alerts_to_send: list[tuple[int, list[SearchAlertHitType]]]
+    email_alerts_to_send: list[tuple[int, list[SearchAlertHitType]]],
+    scheduled_alert: bool = False,
 ) -> None:
     """Send search alert emails for multiple users.
 
@@ -491,6 +492,7 @@ def send_search_alert_emails(
         context = {
             "hits": hits,
             "hits_limit": settings.SCHEDULED_ALERT_HITS_LIMIT,
+            "scheduled_alert": scheduled_alert,
         }
         headers = {}
         query_string = ""
diff --git a/cl/alerts/templates/alert_email_es.html b/cl/alerts/templates/alert_email_es.html
index 804f33a0bb..2b15d540bd 100644
--- a/cl/alerts/templates/alert_email_es.html
+++ b/cl/alerts/templates/alert_email_es.html
@@ -53,7 +53,7 @@ <h3 class="alt bottom" style="font-size: 1.5em; font-weight: normal; line-height
                     {% endif %}
                     <ul>
                       {% for doc in result.child_docs %}
-                        {% with doc=doc|get_attrdict:"_source" %}
+                        {% with doc=doc|get_es_doc_content:scheduled_alert %}
                           <li>
                             <a href="https://www.courtlistener.com{% if doc.absolute_url %}{{ doc.absolute_url }}{% else %}{{ result.docket_absolute_url }}#minute-entry-{{ doc.docket_entry_id }}{% endif %}" class="visitable">{% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe }}<span class="gray">&nbsp;&mdash;&nbsp;</span>{% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
                             </a>
diff --git a/cl/alerts/templates/alert_email_es.txt b/cl/alerts/templates/alert_email_es.txt
index f4aa763cec..b836b10caa 100644
--- a/cl/alerts/templates/alert_email_es.txt
+++ b/cl/alerts/templates/alert_email_es.txt
@@ -19,7 +19,7 @@ Disable this Alert (one click): https://www.courtlistener.com{% url "disable_ale
 {% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}{% endif %}
 {% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}{% endif %}
 {% if type == 'r' %}{% if result.dateFiled %}Date Filed: {{ result.dateFiled|date:"F jS, Y" }}{% else %}Date Filed: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %}
-{% for doc in result.child_docs %}{% with doc=doc|get_attrdict:"_source" %} - {% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe|striptags }} - {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
+{% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:scheduled_alert %} - {% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe|striptags }} - {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
    {% if doc.description %}Description: {{ doc.description|render_string_or_list|safe|striptags }}{% endif %}
    {% if doc.plain_text %}{% contains_highlights doc.plain_text.0 True as highlighted %}{% if highlighted %}...{% endif %}{{ doc.plain_text|render_string_or_list|safe|striptags|underscore_to_space }}...{% endif %}
    View this document on our site: https://www.courtlistener.com{% if doc.absolute_url %}{{ doc.absolute_url }}{% else %}{{ result.docket_absolute_url }}#minute-entry-{{ doc.docket_entry_id }}{% endif %}
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 59b427841c..b9ea921510 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -544,6 +544,9 @@ def test_index_daily_recap_documents(self) -> None:
             documents_indexed, 9, msg="Wrong number of documents indexed."
         )
 
+        docket.delete()
+        docket_2.delete()
+
     def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
         """Test RECAP alerts can be properly filtered out according to
         their query and hits matched conditions.
@@ -712,24 +715,25 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             len(mail.outbox), 2, msg="Outgoing emails don't match."
         )
 
-        # Create a new RD for the same DocketEntry to confirm this new RD is
-        # properly included in the alert email.
-        rd_2 = RECAPDocumentFactory(
-            docket_entry=alert_de,
-            description="Motion to File 2",
-            document_number="2",
-            is_available=True,
-            page_count=3,
-            pacer_doc_id="018036652436",
-            plain_text="plain text for 018036652436",
-        )
-        call_command(
-            "cl_index_parent_and_child_docs",
-            search_type=SEARCH_TYPES.RECAP,
-            queue="celery",
-            pk_offset=0,
-            testing_mode=True,
-        )
+        with time_machine.travel(mock_date, tick=False):
+            # Create a new RD for the same DocketEntry to confirm this new RD is
+            # properly included in the alert email.
+            rd_2 = RECAPDocumentFactory(
+                docket_entry=alert_de,
+                description="Motion to File 2",
+                document_number="2",
+                is_available=True,
+                page_count=3,
+                pacer_doc_id="018036652436",
+                plain_text="plain text for 018036652436",
+            )
+            call_command(
+                "cl_index_parent_and_child_docs",
+                search_type=SEARCH_TYPES.RECAP,
+                queue="celery",
+                pk_offset=0,
+                testing_mode=True,
+            )
 
         with mock.patch(
             "cl.api.webhooks.requests.post",
@@ -906,6 +910,8 @@ def test_limit_alert_case_child_hits(self) -> None:
 
         self.assertIn("View Additional Results for this Case", txt_email)
 
+        alert_de.delete()
+
     @override_settings(SCHEDULED_ALERT_HITS_LIMIT=3)
     def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
         """Test multiple alerts can be grouped in an email and hits within an
@@ -1146,29 +1152,77 @@ def test_schedule_wly_and_mly_recap_alerts(self) -> None:
         ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
+        # Weekly and monthly alerts are not sent right away but are scheduled as
+        # ScheduledAlertHit to be sent by the cl_send_scheduled_alerts command.
         self.assertEqual(
             len(mail.outbox), 0, msg="Outgoing emails don't match."
         )
         schedule_alerts = ScheduledAlertHit.objects.all()
         self.assertEqual(schedule_alerts.count(), 3)
 
-        # Assert webhooks.
+        # Webhooks are send immediately as hits are matched.
         webhook_events = WebhookEvent.objects.all().values_list(
             "content", flat=True
         )
         self.assertEqual(len(webhook_events), 3)
 
-        # Send  Weekly alerts and check assertions.
+        # Send scheduled Weekly alerts and check assertions.
         call_command("cl_send_scheduled_alerts", rate=Alert.WEEKLY)
         self.assertEqual(
             len(mail.outbox), 1, msg="Outgoing emails don't match."
         )
+        # Assert docket-only alert.
+        html_content = self.get_html_content_from_email(mail.outbox[0])
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            docket_only_alert.name,
+            1,
+            self.de.docket.case_name,
+            0,
+        )
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            cross_object_alert_with_hl.name,
+            1,
+            self.de.docket.case_name,
+            1,
+        )
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_with_hl.name,
+            self.de.docket.case_name,
+            [self.rd.description],
+        )
+        # Assert email text version:
+        txt_email = mail.outbox[0].body
+        self.assertIn(docket_only_alert.name, txt_email)
+        self.assertIn(cross_object_alert_with_hl.name, txt_email)
+        self.assertIn(self.rd.description, txt_email)
 
-        # Send  Monthly alerts and check assertions.
+        # Send  scheduled Monthly alerts and check assertions.
         call_command("cl_send_scheduled_alerts", rate=Alert.MONTHLY)
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
         )
+        html_content = self.get_html_content_from_email(mail.outbox[1])
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            recap_only_alert.name,
+            1,
+            self.de.docket.case_name,
+            2,
+        )
+        self._assert_child_hits_content(
+            html_content,
+            recap_only_alert.name,
+            self.de.docket.case_name,
+            [self.rd.description, self.rd_att.description],
+        )
+        # Assert email text version:
+        txt_email = mail.outbox[1].body
+        self.assertIn(recap_only_alert.name, txt_email)
+        self.assertIn(self.rd.description, txt_email)
+        self.assertIn(self.rd_att.description, txt_email)
 
     def test_alert_frequency_estimation(self):
         """Test alert frequency ES API endpoint for RECAP Alerts."""
@@ -1189,6 +1243,7 @@ def test_alert_frequency_estimation(self):
         with time_machine.travel(
             mock_date, tick=False
         ), self.captureOnCommitCallbacks(execute=True):
+            # Docket filed today.
             docket = DocketFactory(
                 court=self.court,
                 case_name="Frequency Test RECAP",
@@ -1197,12 +1252,36 @@ def test_alert_frequency_estimation(self):
                 date_filed=now().date(),
             )
 
+            # RECAPDocument filed today that belongs to a docket filed outside
+            # the estimation range.
+            date_outside_range = now() - datetime.timedelta(days=101)
+            alert_de = DocketEntryWithParentsFactory(
+                docket=DocketFactory(
+                    court=self.court,
+                    case_name="Frequency Test RECAP",
+                    docket_number="1:21-bk-1245",
+                    source=Docket.RECAP,
+                    date_filed=date_outside_range.date(),
+                ),
+                entry_number=1,
+                date_filed=now().date(),
+            )
+            RECAPDocumentFactory(
+                docket_entry=alert_de,
+                description="Frequency Test RECAP",
+                document_number="1",
+                pacer_doc_id="018036652450",
+            )
+
         r = self.client.get(
             reverse(
                 "alert_frequency", kwargs={"version": "4", "day_count": "100"}
             ),
             search_params,
         )
-        self.assertEqual(r.json()["count"], 1)
+        # 2 expected hits in the last 100 days. One docket filed today + one
+        # RECAPDocument filed today.
+        self.assertEqual(r.json()["count"], 2)
 
         docket.delete()
+        alert_de.docket.delete()
diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 4ce97f3e63..b67396e296 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -132,11 +132,25 @@ def random_int(a: int, b: int) -> int:
 
 
 @register.filter
-def get_attrdict(mapping, key):
-    """Emulates the dictionary get for AttrDict objects. Useful when keys
-    have spaces or other punctuation."""
+def get_es_doc_content(
+    mapping: AttrDict | dict, scheduled_alert: bool = False
+) -> AttrDict | dict | str:
+    """
+    Returns the ES document content placed in the "_source" field if the
+    document is an AttrDict, or just returns the content if it's not necessary
+    to extract from "_source" such as in scheduled alerts where the content is
+     a dict.
+
+    :param mapping: The AttrDict or dict instance to extract the content from.
+    :param scheduled_alert: A boolean indicating if the content belongs to a
+    scheduled alert where the content is already in place.
+    :return: The ES document content.
+    """
+
+    if scheduled_alert:
+        return mapping
     try:
-        return mapping[key]
+        return mapping["_source"]
     except KeyError:
         return ""
 
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 5f24193886..af7118fd04 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -135,8 +135,8 @@ def build_numeric_range_query(
 
 def build_daterange_query(
     field: str,
-    before: datetime.date,
-    after: datetime.date,
+    before: datetime.date | str,
+    after: datetime.date | str,
     relation: Literal["INTERSECTS", "CONTAINS", "WITHIN", None] = None,
 ) -> list[Range]:
     """Given field name and date range limits returns ElasticSearch range query or None
@@ -1991,7 +1991,7 @@ def fetch_es_results(
     return [], 0, error, None, None
 
 
-def build_has_child_filters(cd: CleanData) -> list[QueryString]:
+def build_has_child_filters(cd: CleanData) -> list[QueryString | Range]:
     """Builds Elasticsearch 'has_child' filters based on the given child type
     and CleanData.
 
@@ -2027,6 +2027,8 @@ def build_has_child_filters(cd: CleanData) -> list[QueryString]:
         description = cd.get("description", "")
         document_number = cd.get("document_number", "")
         attachment_number = cd.get("attachment_number", "")
+        entry_date_filed_after = cd.get("entry_date_filed_after", "")
+        entry_date_filed_before = cd.get("entry_date_filed_before", "")
 
         if available_only:
             queries_list.extend(
@@ -2045,6 +2047,14 @@ def build_has_child_filters(cd: CleanData) -> list[QueryString]:
             queries_list.extend(
                 build_term_query("attachment_number", attachment_number)
             )
+        if entry_date_filed_after or entry_date_filed_before:
+            queries_list.extend(
+                build_daterange_query(
+                    "entry_date_filed",
+                    entry_date_filed_before,
+                    entry_date_filed_after,
+                )
+            )
 
     return queries_list
 
@@ -3024,6 +3034,44 @@ def do_es_alert_estimation_query(
     cd[before_field] = None
     estimation_query, _ = build_es_base_query(search_query, cd)
 
+    if cd["type"] == SEARCH_TYPES.RECAP:
+        # The RECAP estimation query consists of two requests: one to estimate
+        # Docket hits and one to estimate RECAPDocument hits.
+        del cd[after_field]
+        del cd[before_field]
+        cd["entry_date_filed_after"] = (
+            datetime.date.today() - datetime.timedelta(days=int(day_count))
+        )
+        cd["entry_date_filed_before"] = None
+
+        main_doc_count_query = clean_count_query(estimation_query)
+        main_doc_count_query = main_doc_count_query.extra(
+            size=0, track_total_hits=True
+        )
+
+        # Perform the two queries in a single request.
+        multi_search = MultiSearch()
+        multi_search = multi_search.add(main_doc_count_query)
+
+        # Build RECAPDocuments count query.
+        _, join_query = build_es_base_query(search_query, cd)
+        child_docs_count_query = build_child_docs_query(join_query, cd)
+        child_total = 0
+        if child_docs_count_query:
+            child_docs_count_query = search_query.query(child_docs_count_query)
+            child_total_query = child_docs_count_query.extra(
+                size=0, track_total_hits=True
+            )
+            multi_search = multi_search.add(child_total_query)
+
+        responses = multi_search.execute()
+        parent_total = responses[0].hits.total.value
+        if child_docs_count_query:
+            child_doc_count_response = responses[1]
+            child_total = child_doc_count_response.hits.total.value
+        total_recap_estimation = parent_total + child_total
+        return total_recap_estimation
+
     return estimation_query.count()
 
 
diff --git a/cl/search/templates/feeds/solr_desc_template.html b/cl/search/templates/feeds/solr_desc_template.html
index ce02928003..479d9fe4f5 100644
--- a/cl/search/templates/feeds/solr_desc_template.html
+++ b/cl/search/templates/feeds/solr_desc_template.html
@@ -7,7 +7,7 @@
     {% else %}
       {% flag "o-es-active" %}
         {% if doc0.child_docs %}
-          {% with doc=doc0.child_docs.0|get_attrdict:"_source" %}
+          {% with doc=doc0.child_docs.0|get_es_doc_content %}
             <p>{{ doc.text|render_string_or_list|safe|truncatewords:"500" }}</p><br>
           {% endwith %}
         {% else %}
diff --git a/cl/search/templates/includes/pa_search_result.html b/cl/search/templates/includes/pa_search_result.html
index 822c359307..9c74505e3b 100644
--- a/cl/search/templates/includes/pa_search_result.html
+++ b/cl/search/templates/includes/pa_search_result.html
@@ -4,7 +4,7 @@
 {% load humanize %}
 
 {% for result in results.object_list %}
-  {% with opinion=result.grouped_by_opinion_cluster_id.hits.hits.0|get_attrdict:"_source" %}
+  {% with opinion=result.grouped_by_opinion_cluster_id.hits.hits.0|get_es_doc_content %}
   <article>
     <h3 class="bottom serif">
       <a href="{% url 'view_case' opinion.cluster_id opinion.opinion_cluster_slug %}?{{ request.META.QUERY_STRING }}" class="visitable">
@@ -30,7 +30,7 @@ <h3 class="bottom serif">
 
       {% for parenthetical_group in result.grouped_by_opinion_cluster_id.hits.hits %}
         <div class="col-md-offset-half">
-        {% with pa_group=parenthetical_group|get_attrdict:"_source" %}
+        {% with pa_group=parenthetical_group|get_es_doc_content %}
           <h4>
             <a href="{% url 'view_case' pa_group.describing_opinion_cluster_id pa_group.describing_opinion_cluster_slug %}?{{ request.META.QUERY_STRING }}" class="visitable">
               {{ pa_group.representative_text|safe }}
diff --git a/cl/search/templates/includes/search_result.html b/cl/search/templates/includes/search_result.html
index 6776c98ad8..b099b772ae 100644
--- a/cl/search/templates/includes/search_result.html
+++ b/cl/search/templates/includes/search_result.html
@@ -163,7 +163,7 @@ <h3 class="bottom serif">
     {% endif %}
 
     {% for doc in result.child_docs %}
-      {% with doc=doc|get_attrdict:"_source" %}
+      {% with doc=doc|get_es_doc_content %}
         <div class="col-md-offset-half">
           <h4>
             <a href="{% if doc.absolute_url %}{{ doc.absolute_url }}{% else %}{{ result.docket_absolute_url }}#minute-entry-{{ doc.docket_entry_id }}{% endif %}" class="visitable">{% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe }}<span class="gray">&nbsp;&mdash;&nbsp;</span>{% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
@@ -395,7 +395,7 @@ <h4>
 
   {% if type == SEARCH_TYPES.OPINION or type_override == SEARCH_TYPES.OPINION and simple == False %}
     {% for doc in result.child_docs %}
-      {% with doc=doc|get_attrdict:"_source" %}
+      {% with doc=doc|get_es_doc_content %}
         <div class="col-md-offset-half">
           {% if result.child_docs|length > 1 or doc.type != 'combined-opinion' %}
             <h4>

From ebf269d15efde0c94f562aaa059a355a8423e32b Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 2 Jul 2024 19:55:48 -0600
Subject: [PATCH 13/33] fix(elasticsearch): Fixed build_daterange_query type
 hint

---
 cl/lib/elasticsearch_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index af7118fd04..72c982deca 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -151,9 +151,9 @@ def build_daterange_query(
 
     params = {}
     if any([before, after]):
-        if hasattr(after, "strftime"):
+        if isinstance(after, datetime.date):
             params["gte"] = f"{after.isoformat()}T00:00:00Z"
-        if hasattr(before, "strftime"):
+        if isinstance(before, datetime.date):
             params["lte"] = f"{before.isoformat()}T23:59:59Z"
         if relation is not None:
             allowed_relations = ["INTERSECTS", "CONTAINS", "WITHIN"]

From bffee6d2ad1a724a26b4d78b7c7552aa9df5854c Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 2 Jul 2024 21:14:53 -0600
Subject: [PATCH 14/33] fix(alerts): Fixed re_index task estimated remaining
 time compute

---
 .../commands/cl_send_recap_alerts.py          | 53 ++++++++++++++-----
 cl/alerts/tests/tests_recap_alerts.py         | 35 ++++++------
 2 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 864dddbf51..d11d6dbce1 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -59,6 +59,37 @@ def get_task_status(task_id: str, es: Elasticsearch) -> dict[str, Any]:
         return {}
 
 
+def compute_estimated_remaining_time(
+    initial_wait: float, start_time_millis: int, created: int, total: int
+) -> float:
+    """Compute the estimated remaining time for the re_index task to complete.
+
+    :param initial_wait: The default wait time in seconds.
+    :param start_time_millis: The start time in milliseconds epoch.
+    :param created: The number of items created so far.
+    :param total: The total number of items to be created.
+    :return: The estimated remaining time in seconds. If the start time,
+    created, or total are invalid, the initial default time is returned.
+    """
+
+    if start_time_millis is None or not created or not total:
+        return initial_wait
+
+    start_time = datetime.datetime.fromtimestamp(start_time_millis / 1000.0)
+    estimated_time_remaining = max(
+        datetime.timedelta(
+            seconds=(
+                (datetime.datetime.now() - start_time).total_seconds()
+                / created
+            )
+            * (total - created)
+        ).total_seconds(),
+        initial_wait,
+    )
+
+    return estimated_time_remaining
+
+
 def index_daily_recap_documents(
     r: Redis, source_index: str, target_index: str, testing: bool = False
 ) -> int:
@@ -182,19 +213,24 @@ def index_daily_recap_documents(
     else:
         task_id = r.get("alert_sweep:task_id")
 
-    estimated_time_remaining = 0.1 if testing else 60
-    time.sleep(estimated_time_remaining)
+    initial_wait = 0.01 if testing else 60.0
+    time.sleep(initial_wait)
     task_info = get_task_status(task_id, es)
     if task_info:
         status = task_info["task"]["status"]
         created = status["created"]
         total = status["total"]
+        start_time_millis = task_info["task"]["start_time_in_millis"]
     else:
         task_info["completed"] = False
         created = 0
         total = 0
+        start_time_millis = None
 
     iterations_count = 0
+    estimated_time_remaining = compute_estimated_remaining_time(
+        initial_wait, start_time_millis, created, total
+    )
     while not task_info["completed"]:
         logger.info(
             f"Task progress: {created}/{total} documents. Estimated time to"
@@ -205,19 +241,12 @@ def index_daily_recap_documents(
         if task_info and not task_info["completed"]:
             status = task_info["task"]["status"]
             start_time_millis = task_info["task"]["start_time_in_millis"]
-            start_time = datetime.datetime.fromtimestamp(
-                start_time_millis / 1000.0
-            )
             created = status["created"]
             total = status["total"]
             if total and created:
-                estimated_time_remaining = datetime.timedelta(
-                    seconds=(
-                        (datetime.datetime.now() - start_time).total_seconds()
-                        / created
-                    )
-                    * (total - created)
-                ).total_seconds()
+                estimated_time_remaining = compute_estimated_remaining_time(
+                    initial_wait, start_time_millis, created, total
+                )
         if not task_info:
             iterations_count += 1
         if iterations_count > 10:
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index b9ea921510..78cd52fe8b 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -75,6 +75,9 @@ def setUpTestData(cls):
             )
 
     def setUp(self):
+        self.r = get_redis_interface("CACHE")
+        self.r.delete("alert_sweep:query_date")
+        self.r.delete("alert_sweep:task_id")
         RECAPSweepDocument._index.delete(ignore=404)
         RECAPSweepDocument.init()
 
@@ -242,10 +245,9 @@ async def test_recap_document_hl_matched(self) -> None:
         contains RECAPDocument HL fields."""
 
         # Index base document factories.
-        r = get_redis_interface("CACHE")
         with time_machine.travel(self.mock_date, tick=False):
             index_daily_recap_documents(
-                r,
+                self.r,
                 DocketDocument._index._name,
                 RECAPSweepDocument._index._name,
                 testing=True,
@@ -385,7 +387,7 @@ def test_filter_recap_alerts_to_send(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         # Only the RECAP RT alert for a member and the RECAP DLY alert are sent.
@@ -402,7 +404,6 @@ def test_index_daily_recap_documents(self) -> None:
         """Test index_daily_recap_documents method over different documents
         conditions.
         """
-        r = get_redis_interface("CACHE")
         recap_search = DocketDocument.search()
         recap_dockets = recap_search.query(Q("match", docket_child="docket"))
         self.assertEqual(recap_dockets.count(), 2)
@@ -423,7 +424,7 @@ def test_index_daily_recap_documents(self) -> None:
         # RECAPDocuments indexed the same day.
         with time_machine.travel(self.mock_date, tick=False):
             documents_indexed = index_daily_recap_documents(
-                r,
+                self.r,
                 DocketDocument._index._name,
                 RECAPSweepDocument._index._name,
                 testing=True,
@@ -485,7 +486,7 @@ def test_index_daily_recap_documents(self) -> None:
         # Run the indexer.
         with time_machine.travel(self.mock_date, tick=False):
             documents_indexed = index_daily_recap_documents(
-                r,
+                self.r,
                 DocketDocument._index._name,
                 RECAPSweepDocument._index._name,
                 testing=True,
@@ -535,7 +536,7 @@ def test_index_daily_recap_documents(self) -> None:
         # Run the indexer.
         with time_machine.travel(self.mock_date, tick=False):
             documents_indexed = index_daily_recap_documents(
-                r,
+                self.r,
                 DocketDocument._index._name,
                 RECAPSweepDocument._index._name,
                 testing=True,
@@ -578,7 +579,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         self.assertEqual(
@@ -655,7 +656,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
         # The RD ingestion's shouldn't match the docket-only alert.
         self.assertEqual(
@@ -674,7 +675,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
         # 1 New alert should be triggered.
         self.assertEqual(
@@ -708,7 +709,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
         # No new alert should be triggered.
         self.assertEqual(
@@ -740,7 +741,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing only the new RD created.
@@ -769,7 +770,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing two RDs (rd and rd_2)
@@ -803,7 +804,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing one RD (rd_2)
@@ -869,7 +870,7 @@ def test_limit_alert_case_child_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         self.assertEqual(
@@ -999,7 +1000,7 @@ def test_multiple_alerts_email_hits_limit_per_alert(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         self.assertEqual(
@@ -1149,7 +1150,7 @@ def test_schedule_wly_and_mly_recap_alerts(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ), time_machine.travel(self.mock_date, tick=False):
+        ):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         # Weekly and monthly alerts are not sent right away but are scheduled as

From 847f0fd8e5912ab561a4eb4b74199176c52870bf Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 3 Jul 2024 11:12:10 -0600
Subject: [PATCH 15/33] fix(alerts): Handle creation and removal of the RECAP
 alerts sweep index.

- Ensure document timestamps get updated on partial updates.
---
 .../commands/cl_send_recap_alerts.py          |  37 +++--
 cl/alerts/tests/tests_recap_alerts.py         | 126 ++++++++++++++----
 cl/search/tasks.py                            |  14 +-
 3 files changed, 144 insertions(+), 33 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index d11d6dbce1..ce5ebc0f1f 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -2,7 +2,7 @@
 import datetime
 import time
 import traceback
-from typing import Any
+from typing import Any, Type
 
 import pytz
 from asgiref.sync import async_to_sync
@@ -91,21 +91,29 @@ def compute_estimated_remaining_time(
 
 
 def index_daily_recap_documents(
-    r: Redis, source_index: str, target_index: str, testing: bool = False
+    r: Redis,
+    source_index_name: str,
+    target_index: Type[RECAPSweepDocument],
+    testing: bool = False,
 ) -> int:
     """Index Dockets added/modified during the day and all their RECAPDocuments
     and RECAPDocuments added/modified during the day and their parent Dockets.
     It uses the ES re_index API,
 
     :param r: Redis client instance.
-    :param source_index: The source Elasticsearch index from which documents
-     will be queried.
+    :param source_index_name: The source Elasticsearch index name from which
+    documents will be queried.
     :param target_index: The target Elasticsearch index to which documents will
      be re-indexed.
     :param testing: Boolean flag for testing mode.
     :return: The total number of documents re-indexed.
     """
 
+    if r.exists("alert_sweep:re_index_completed"):
+        # The re-indexing has been completed for the day. Abort it and proceed
+        # with sending alerts.
+        return 0
+
     if not r.exists("alert_sweep:query_date"):
         # In case of a failure, store the date when alerts should be queried in
         # Redis, so the command can be resumed.
@@ -129,7 +137,6 @@ def index_daily_recap_documents(
 
     today_datetime_iso = local_midnight_utc.isoformat().replace("+00:00", "Z")
     next_day_utc_iso = next_day_utc.isoformat().replace("+00:00", "Z")
-
     # Re Index API query.
     query = {
         "bool": {
@@ -199,11 +206,16 @@ def index_daily_recap_documents(
     }
 
     if not r.exists("alert_sweep:task_id"):
+        # Remove the index from the previous day and create a new one.
+        target_index._index.delete(ignore=404)
+        target_index.init()
+        target_index_name = target_index._index._name
+
         # In case of a failure, store the task_id in Redis so the command
         # can be resumed.
         response = es.reindex(
-            source={"index": source_index, "query": query},
-            dest={"index": target_index},
+            source={"index": source_index_name, "query": query},
+            dest={"index": target_index_name},
             wait_for_completion=False,
             refresh=True,
         )
@@ -259,6 +271,8 @@ def index_daily_recap_documents(
 
     r.delete("alert_sweep:query_date")
     r.delete("alert_sweep:task_id")
+    if not testing:
+        r.set("alert_sweep:re_index_completed", 1, ex=3600 * 12)
     return total
 
 
@@ -497,6 +511,12 @@ def query_and_schedule_alerts(r: Redis, rate: str):
 
 
 class Command(VerboseCommand):
+    """Query and re-index (into the RECAP sweep index) all the RECAP content
+    that has changed during the current period, along with their related
+    documents. Then use the RECAP sweep index to query and send real-time and
+    daily RECAP alerts. Finally, schedule weekly and monthly RECAP alerts.
+    """
+
     help = "Send RECAP Search Alerts."
 
     def add_arguments(self, parser):
@@ -513,10 +533,11 @@ def handle(self, *args, **options):
         index_daily_recap_documents(
             r,
             DocketDocument._index._name,
-            RECAPSweepDocument._index._name,
+            RECAPSweepDocument,
             testing=testing_mode,
         )
         query_and_send_alerts(r, Alert.REAL_TIME)
         query_and_send_alerts(r, Alert.DAILY)
         query_and_schedule_alerts(r, Alert.WEEKLY)
         query_and_schedule_alerts(r, Alert.MONTHLY)
+        r.delete("alert_sweep:re_index_completed")
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 78cd52fe8b..22f647b7d9 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -78,8 +78,6 @@ def setUp(self):
         self.r = get_redis_interface("CACHE")
         self.r.delete("alert_sweep:query_date")
         self.r.delete("alert_sweep:task_id")
-        RECAPSweepDocument._index.delete(ignore=404)
-        RECAPSweepDocument.init()
 
     @staticmethod
     def get_html_content_from_email(email_content):
@@ -249,7 +247,7 @@ async def test_recap_document_hl_matched(self) -> None:
             index_daily_recap_documents(
                 self.r,
                 DocketDocument._index._name,
-                RECAPSweepDocument._index._name,
+                RECAPSweepDocument,
                 testing=True,
             )
 
@@ -404,6 +402,8 @@ def test_index_daily_recap_documents(self) -> None:
         """Test index_daily_recap_documents method over different documents
         conditions.
         """
+        RECAPSweepDocument._index.delete(ignore=404)
+        RECAPSweepDocument.init()
         recap_search = DocketDocument.search()
         recap_dockets = recap_search.query(Q("match", docket_child="docket"))
         self.assertEqual(recap_dockets.count(), 2)
@@ -426,7 +426,7 @@ def test_index_daily_recap_documents(self) -> None:
             documents_indexed = index_daily_recap_documents(
                 self.r,
                 DocketDocument._index._name,
-                RECAPSweepDocument._index._name,
+                RECAPSweepDocument,
                 testing=True,
             )
         self.assertEqual(
@@ -488,7 +488,7 @@ def test_index_daily_recap_documents(self) -> None:
             documents_indexed = index_daily_recap_documents(
                 self.r,
                 DocketDocument._index._name,
-                RECAPSweepDocument._index._name,
+                RECAPSweepDocument,
                 testing=True,
             )
         self.assertEqual(
@@ -538,13 +538,96 @@ def test_index_daily_recap_documents(self) -> None:
             documents_indexed = index_daily_recap_documents(
                 self.r,
                 DocketDocument._index._name,
-                RECAPSweepDocument._index._name,
+                RECAPSweepDocument,
+                testing=True,
+            )
+        self.assertEqual(
+            documents_indexed, 9, msg="Wrong number of documents indexed."
+        )
+
+        # Docket and RD created on previous days, will be used later to confirm
+        # documents got indexed into the sweep index after partial updates.
+        three_days_before = now() - datetime.timedelta(days=5)
+        mock_three_days_before = three_days_before.replace(hour=5)
+        with time_machine.travel(
+            mock_three_days_before, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
+            docket_old = DocketFactory(
+                court=self.court,
+                case_name="SUBPOENAS SERVED LOREM OFF",
+                docket_number="1:21-bk-1254",
+                source=Docket.RECAP,
+            )
+            alert_de_old = DocketEntryWithParentsFactory(
+                docket=docket_old,
+                entry_number=1,
+                date_filed=datetime.date(2024, 8, 19),
+                description="MOTION for Leave to File Amicus Curiae Lorem Served",
+            )
+            rd_old = RECAPDocumentFactory(
+                docket_entry=alert_de_old,
+                description="Motion to File",
+                document_number="1",
+                is_available=True,
+            )
+            rd_old_2 = RECAPDocumentFactory(
+                docket_entry=alert_de_old,
+                description="Motion to File 2",
+                document_number="2",
+                is_available=True,
+            )
+
+        # Run the indexer. No new documents re_indexed.
+        with time_machine.travel(self.mock_date, tick=False):
+            documents_indexed = index_daily_recap_documents(
+                self.r,
+                DocketDocument._index._name,
+                RECAPSweepDocument,
                 testing=True,
             )
         self.assertEqual(
             documents_indexed, 9, msg="Wrong number of documents indexed."
         )
 
+        # Update the documents today:
+        with time_machine.travel(
+            self.mock_date, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
+            rd_old_2.document_number = 3
+            rd_old_2.save()
+
+        # Run the indexer. No new documents re_indexed.
+        with time_machine.travel(self.mock_date, tick=False):
+            documents_indexed = index_daily_recap_documents(
+                self.r,
+                DocketDocument._index._name,
+                RECAPSweepDocument,
+                testing=True,
+            )
+        self.assertEqual(
+            documents_indexed, 11, msg="Wrong number of documents indexed."
+        )
+
+        # Update the Docket today:
+        with time_machine.travel(
+            self.mock_date, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
+            docket_old.case_name = "SUBPOENAS SERVED LOREM OFF UPDATED"
+            docket_old.save()
+
+        # Run the indexer. No new documents re_indexed.
+        with time_machine.travel(self.mock_date, tick=False):
+            documents_indexed = index_daily_recap_documents(
+                self.r,
+                DocketDocument._index._name,
+                RECAPSweepDocument,
+                testing=True,
+            )
+        self.assertEqual(
+            documents_indexed, 12, msg="Wrong number of documents indexed."
+        )
+
+        docket_old.delete()
         docket.delete()
         docket_2.delete()
 
@@ -579,7 +662,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ):
+        ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         self.assertEqual(
@@ -610,9 +693,8 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             query='q="405 Civil"&type=r',
         )
         # Simulate docket is ingested a day before.
-        one_day_before = now() - datetime.timedelta(days=1)
-        mock_date = one_day_before.replace(hour=5)
-        with time_machine.travel(mock_date, tick=False):
+        one_day_before = self.mock_date - datetime.timedelta(days=1)
+        with time_machine.travel(one_day_before, tick=False):
             docket = DocketFactory(
                 court=self.court,
                 case_name="SUBPOENAS SERVED CASE",
@@ -626,8 +708,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             )
 
         # Its related RD is ingested today.
-        mock_date = now().replace(hour=5)
-        with time_machine.travel(mock_date, tick=False):
+        with time_machine.travel(self.mock_date, tick=False):
             alert_de = DocketEntryWithParentsFactory(
                 docket=docket,
                 entry_number=1,
@@ -656,7 +737,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ):
+        ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
         # The RD ingestion's shouldn't match the docket-only alert.
         self.assertEqual(
@@ -675,7 +756,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ):
+        ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
         # 1 New alert should be triggered.
         self.assertEqual(
@@ -709,14 +790,14 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ):
+        ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
         # No new alert should be triggered.
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
         )
 
-        with time_machine.travel(mock_date, tick=False):
+        with time_machine.travel(self.mock_date, tick=False):
             # Create a new RD for the same DocketEntry to confirm this new RD is
             # properly included in the alert email.
             rd_2 = RECAPDocumentFactory(
@@ -741,7 +822,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ):
+        ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing only the new RD created.
@@ -770,7 +851,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ):
+        ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing two RDs (rd and rd_2)
@@ -804,7 +885,7 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
             side_effect=lambda *args, **kwargs: MockResponse(
                 200, mock_raw=True
             ),
-        ):
+        ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
         # A new alert should be triggered containing one RD (rd_2)
@@ -831,8 +912,7 @@ def test_limit_alert_case_child_hits(self) -> None:
         results for this Case" button.
         """
 
-        mock_date = now().replace(hour=5)
-        with time_machine.travel(mock_date, tick=False):
+        with time_machine.travel(self.mock_date, tick=False):
             alert_de = DocketEntryWithParentsFactory(
                 docket=self.de.docket,
                 entry_number=1,
@@ -1239,10 +1319,8 @@ def test_alert_frequency_estimation(self):
             search_params,
         )
         self.assertEqual(r.json()["count"], 0)
-
-        mock_date = now().replace(day=1, hour=5)
         with time_machine.travel(
-            mock_date, tick=False
+            self.mock_date, tick=False
         ), self.captureOnCommitCallbacks(execute=True):
             # Docket filed today.
             docket = DocketFactory(
diff --git a/cl/search/tasks.py b/cl/search/tasks.py
index df7d337f26..49115e192a 100644
--- a/cl/search/tasks.py
+++ b/cl/search/tasks.py
@@ -462,6 +462,13 @@ def document_fields_to_update(
                 continue
             field_value = prepare_method(main_instance)
             fields_to_update[field] = field_value
+
+    if fields_to_update:
+        # If fields to update, append the timestamp to be updated too.
+        prepare_timestamp = getattr(es_document(), f"prepare_timestamp", None)
+        if prepare_timestamp:
+            field_value = prepare_timestamp(main_instance)
+            fields_to_update["timestamp"] = field_value
     return fields_to_update
 
 
@@ -762,9 +769,14 @@ def update_children_docs_by_query(
     # Build the UpdateByQuery script and execute it
     script_lines = []
     params = {}
+    if fields_to_update:
+        # If there are fields to update include the timestamp field too.
+        fields_to_update.append("timestamp")
     for field_to_update in fields_to_update:
         field_list = (
-            fields_map[field_to_update] if fields_map else [field_to_update]
+            ["timestamp"]
+            if field_to_update == "timestamp"
+            else fields_map.get(field_to_update, [field_to_update])
         )
         for field_name in field_list:
             script_lines.append(

From 4b324c9727f9399883dbdb214de8dd7cd3b1a3b9 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 3 Jul 2024 12:38:28 -0600
Subject: [PATCH 16/33] fix(elasticsearch): Fixed tests related to timestamp
 updates

---
 cl/search/tasks.py                  |  6 +++++-
 cl/search/tests/tests.py            | 13 -------------
 cl/search/tests/tests_es_opinion.py | 10 ++++++----
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/cl/search/tasks.py b/cl/search/tasks.py
index 49115e192a..241ec87a2a 100644
--- a/cl/search/tasks.py
+++ b/cl/search/tasks.py
@@ -776,7 +776,11 @@ def update_children_docs_by_query(
         field_list = (
             ["timestamp"]
             if field_to_update == "timestamp"
-            else fields_map.get(field_to_update, [field_to_update])
+            else (
+                fields_map[field_to_update]
+                if fields_map
+                else [field_to_update]
+            )
         )
         for field_name in field_list:
             script_lines.append(
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index 2dcd3a78ab..510bbd3510 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -2612,19 +2612,6 @@ def test_remove_opinions_by_timestamp(self, mock_logging_prefix):
                 testing_mode=True,
             )
 
-        with self.captureOnCommitCallbacks(execute=True):
-            # Trigger a change in opinion_1 to confirm the timestamp is not
-            # updated.
-            opinion_1.type = Opinion.UNANIMOUS
-            opinion_1.save()
-
-        # The timestamp in opinion_1 remains the same as it was from 5 days ago
-        opinion_1_doc = OpinionClusterDocument.get(
-            ES_CHILD_ID(opinion_1.pk).OPINION
-        )
-        self.assertEqual(opinion_1_doc.type, "unanimous-opinion")
-        self.assertEqual(opinion_1_doc.timestamp.date(), five_days_ago.date())
-
         # The timestamp in opinion_2 is updated to 2 days ago.
         opinion_2_doc = OpinionClusterDocument.get(
             ES_CHILD_ID(opinion_2.pk).OPINION
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index ac8593562d..4e8fafb005 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -626,8 +626,9 @@ def test_extract_snippet_from_db_highlight_disabled(self) -> None:
         prioritizing the different text fields available in the content when
         highlighting is disabled."""
 
-        with self.captureOnCommitCallbacks(execute=True):
-
+        with time_machine.travel(
+            self.mock_date, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
             c_2_opinion_1 = OpinionFactory.create(
                 extracted_by_ocr=True,
                 author=self.person_2,
@@ -635,7 +636,6 @@ def test_extract_snippet_from_db_highlight_disabled(self) -> None:
                 html_lawbox="<b>html_lawbox</b> &amp; text from DB",
                 cluster=self.opinion_cluster_2,
             )
-
             c_2_opinion_2 = OpinionFactory.create(
                 extracted_by_ocr=True,
                 author=self.person_2,
@@ -710,7 +710,9 @@ def test_extract_snippet_from_db_highlight_disabled(self) -> None:
                     )
                     self.assertEqual(expected_text, result_opinion["snippet"])
 
-        with self.captureOnCommitCallbacks(execute=True):
+        with time_machine.travel(
+            self.mock_date, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
             c_2_opinion_1.delete()
             c_2_opinion_2.delete()
             c_2_opinion_3.delete()

From 0d63080ec2a7d5d43cd67399286c3a7091b65d77 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 3 Jul 2024 19:27:29 -0600
Subject: [PATCH 17/33] fix(alerts): Fix should_docket_hit_be_included date
 comparison

- Fixed email templates
- Refactored retrieve_task_info
---
 .../commands/cl_send_recap_alerts.py          | 98 ++++++++++++-------
 cl/alerts/tasks.py                            |  2 +
 cl/alerts/templates/alert_email_es.html       |  5 +-
 cl/alerts/templates/alert_email_es.txt        |  4 +-
 cl/alerts/tests/tests_recap_alerts.py         |  2 +-
 5 files changed, 71 insertions(+), 40 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index ce5ebc0f1f..e9ec48e2d8 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -27,6 +27,7 @@
 from cl.api.models import WebhookEventType
 from cl.api.tasks import send_es_search_alert_webhook
 from cl.lib.command_utils import VerboseCommand, logger
+from cl.lib.date_time import dt_as_local_date
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.lib.redis_utils import get_redis_interface
 from cl.search.documents import DocketDocument, RECAPSweepDocument
@@ -76,12 +77,10 @@ def compute_estimated_remaining_time(
         return initial_wait
 
     start_time = datetime.datetime.fromtimestamp(start_time_millis / 1000.0)
+    time_now = datetime.datetime.now()
     estimated_time_remaining = max(
         datetime.timedelta(
-            seconds=(
-                (datetime.datetime.now() - start_time).total_seconds()
-                / created
-            )
+            seconds=((time_now - start_time).total_seconds() / created)
             * (total - created)
         ).total_seconds(),
         initial_wait,
@@ -90,6 +89,31 @@ def compute_estimated_remaining_time(
     return estimated_time_remaining
 
 
+def retrieve_task_info(task_info: dict[str, Any]) -> dict[str, Any]:
+    """Retrieve task information from the given task dict.
+
+    :param task_info: A dictionary containing the task status information.
+    :return: A dictionary with the task completion status, created documents
+    count, total documents count, and the task start time in milliseconds.
+    Retrieve default values in case task_info is not valid.
+    """
+
+    if task_info:
+        status = task_info["task"]["status"]
+        return {
+            "completed": task_info["completed"],
+            "created": status["created"],
+            "total": status["total"],
+            "start_time_millis": task_info["task"]["start_time_in_millis"],
+        }
+    return {
+        "completed": False,
+        "created": 0,
+        "total": 0,
+        "start_time_millis": None,
+    }
+
+
 def index_daily_recap_documents(
     r: Redis,
     source_index_name: str,
@@ -110,6 +134,9 @@ def index_daily_recap_documents(
     """
 
     if r.exists("alert_sweep:re_index_completed"):
+        logger.info(
+            "The re-index task has been completed and will be omitted."
+        )
         # The re-indexing has been completed for the day. Abort it and proceed
         # with sending alerts.
         return 0
@@ -127,6 +154,7 @@ def index_daily_recap_documents(
         # If "alert_sweep:query_date" already exists get it from Redis.
         local_midnight_str: str = str(r.get("alert_sweep:query_date"))
         local_midnight = datetime.datetime.fromisoformat(local_midnight_str)
+        logger.info(f"Resuming re-indexing process for date: {local_midnight}")
 
     es = connections.get_connection()
     # Convert the local (PDT) midnight time to UTC
@@ -222,58 +250,50 @@ def index_daily_recap_documents(
         # Store the task ID in Redis
         task_id = response["task"]
         r.set("alert_sweep:task_id", task_id)
+        logger.info(f"Re-indexing task scheduled ID: {task_id}")
     else:
         task_id = r.get("alert_sweep:task_id")
+        logger.info(f"Resuming re-index task ID: {task_id}")
 
     initial_wait = 0.01 if testing else 60.0
     time.sleep(initial_wait)
-    task_info = get_task_status(task_id, es)
-    if task_info:
-        status = task_info["task"]["status"]
-        created = status["created"]
-        total = status["total"]
-        start_time_millis = task_info["task"]["start_time_in_millis"]
-    else:
-        task_info["completed"] = False
-        created = 0
-        total = 0
-        start_time_millis = None
-
+    get_task_info = retrieve_task_info(get_task_status(task_id, es))
     iterations_count = 0
     estimated_time_remaining = compute_estimated_remaining_time(
-        initial_wait, start_time_millis, created, total
+        initial_wait,
+        get_task_info["start_time_millis"],
+        get_task_info["created"],
+        get_task_info["total"],
     )
-    while not task_info["completed"]:
+    while not get_task_info["completed"]:
         logger.info(
-            f"Task progress: {created}/{total} documents. Estimated time to"
-            f" finish: {estimated_time_remaining}."
+            f"Task progress: {get_task_info['created']}/{get_task_info['total']} documents. "
+            f"Estimated time to finish: {estimated_time_remaining} seconds."
         )
         task_info = get_task_status(task_id, es)
+        get_task_info = retrieve_task_info(task_info)
         time.sleep(estimated_time_remaining)
-        if task_info and not task_info["completed"]:
-            status = task_info["task"]["status"]
-            start_time_millis = task_info["task"]["start_time_in_millis"]
-            created = status["created"]
-            total = status["total"]
-            if total and created:
-                estimated_time_remaining = compute_estimated_remaining_time(
-                    initial_wait, start_time_millis, created, total
-                )
+        if task_info and not get_task_info["completed"]:
+            estimated_time_remaining = compute_estimated_remaining_time(
+                initial_wait,
+                get_task_info["start_time_millis"],
+                get_task_info["created"],
+                get_task_info["total"],
+            )
         if not task_info:
             iterations_count += 1
         if iterations_count > 10:
             logger.error(
                 "Re_index alert sweep index task has failed: %s/%s",
-                created,
-                total,
+                get_task_info["created"],
+                get_task_info["total"],
             )
             break
 
-    r.delete("alert_sweep:query_date")
     r.delete("alert_sweep:task_id")
     if not testing:
         r.set("alert_sweep:re_index_completed", 1, ex=3600 * 12)
-    return total
+    return get_task_info["total"]
 
 
 def should_docket_hit_be_included(
@@ -290,9 +310,16 @@ def should_docket_hit_be_included(
     docket = Docket.objects.filter(id=docket_id).only("date_modified").first()
     if not docket:
         return False
-    date_modified = docket.date_modified.date()
     if not has_document_alert_hit_been_triggered(r, alert_id, "d", docket_id):
-        if date_modified == timezone.now().date():
+        local_midnight_localized = timezone.localtime(
+            timezone.make_aware(
+                datetime.datetime.fromisoformat(
+                    str(r.get("alert_sweep:query_date"))
+                )
+            )
+        )
+        date_modified_localized = dt_as_local_date(docket.date_modified)
+        if date_modified_localized == local_midnight_localized.date():
             return True
     return False
 
@@ -541,3 +568,4 @@ def handle(self, *args, **options):
         query_and_schedule_alerts(r, Alert.WEEKLY)
         query_and_schedule_alerts(r, Alert.MONTHLY)
         r.delete("alert_sweep:re_index_completed")
+        r.delete("alert_sweep:query_date")
diff --git a/cl/alerts/tasks.py b/cl/alerts/tasks.py
index 885ac9b413..f7b004bc54 100644
--- a/cl/alerts/tasks.py
+++ b/cl/alerts/tasks.py
@@ -475,6 +475,8 @@ def send_search_alert_emails(
     :param email_alerts_to_send: A list of two tuples containing the user to
     whom the alerts should be sent. A list of tuples containing the Search
     Alert, (Alert, search type, documents, and number of documents)
+    :param scheduled_alert: A boolean indicating weather this alert has been
+    scheduled
     :return: None
     """
 
diff --git a/cl/alerts/templates/alert_email_es.html b/cl/alerts/templates/alert_email_es.html
index 2b15d540bd..dc2f797268 100644
--- a/cl/alerts/templates/alert_email_es.html
+++ b/cl/alerts/templates/alert_email_es.html
@@ -36,7 +36,7 @@ <h2 style="font-size: 2em; font-weight: normal; font-family: inherit; color: #11
                 <h3 class="alt bottom" style="font-size: 1.5em; font-weight: normal; line-height: 1; font-family: 'Warnock Pro', 'Goudy Old Style','Palatino','Book Antiqua', Georgia, serif; color: #666; border: 0; vertical-align: baseline; font-style: italic; margin: 0; padding: 0;">
                     <a href="https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
                         {{ forloop.counter }}. {{ result|get_highlight:"caseName"|safe }}
-                        ({% if result.court_id != 'scotus' %}{{ result|get_highlight:"court_citation_string"|nbsp|safe }}&nbsp;{% endif %}{% if type == 'o' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% elif type == 'r' %}{{ result.dateFiled|date:"Y" }}{% endif %})
+                        ({% if result.court_id != 'scotus' %}{{ result|get_highlight:"court_citation_string"|nbsp|safe }}&nbsp;{% endif %}{% if type == 'o' or type == 'r'  %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
                     </a>
                 </h3>
                 {% if type == 'r' %}
@@ -68,11 +68,12 @@ <h3 class="alt bottom" style="font-size: 1.5em; font-weight: normal; line-height
                         {% endwith %}
                       {% endfor %}
                     </ul>
-                    {% if result.child_remaining %}
+                    {% if result.child_docs and result.child_remaining %}
                       {% extract_q_value alert.query_run as q_value %}
                       <a href="https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}">
                         <strong>View Additional Results for this Case</strong>
                       </a>
+                      <br>
                     {% endif %}
                 {% else %}
                    <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0; padding: 0;">
diff --git a/cl/alerts/templates/alert_email_es.txt b/cl/alerts/templates/alert_email_es.txt
index b836b10caa..2b7ec3b569 100644
--- a/cl/alerts/templates/alert_email_es.txt
+++ b/cl/alerts/templates/alert_email_es.txt
@@ -15,7 +15,7 @@ We have news regarding your alerts at CourtListener.com
 View Full Results / Edit this Alert: https://www.courtlistener.com/?{{ alert.query_run|safe }}&edit_alert={{ alert.pk }}
 Disable this Alert (one click): https://www.courtlistener.com{% url "disable_alert" alert.secret_key %}{% endif %}
 
-{{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
+{{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' or type == 'r' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
 {% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}{% endif %}
 {% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}{% endif %}
 {% if type == 'r' %}{% if result.dateFiled %}Date Filed: {{ result.dateFiled|date:"F jS, Y" }}{% else %}Date Filed: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %}
@@ -24,7 +24,7 @@ Disable this Alert (one click): https://www.courtlistener.com{% url "disable_ale
    {% if doc.plain_text %}{% contains_highlights doc.plain_text.0 True as highlighted %}{% if highlighted %}...{% endif %}{{ doc.plain_text|render_string_or_list|safe|striptags|underscore_to_space }}...{% endif %}
    View this document on our site: https://www.courtlistener.com{% if doc.absolute_url %}{{ doc.absolute_url }}{% else %}{{ result.docket_absolute_url }}#minute-entry-{{ doc.docket_entry_id }}{% endif %}
 {% endwith %}{% endfor %}
-{% if result.child_remaining %}{% extract_q_value alert.query_run as q_value %}View Additional Results for this Case: https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}{% endif %}
+{% if result.child_docs and result.child_remaining %}{% extract_q_value alert.query_run as q_value %}View Additional Results for this Case: https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}{% endif %}
 {% endif %}~~~~~
  - View this item on our site: https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %}
 {% if result.download_url %} - Download original from the court: {{result.download_url}}
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 22f647b7d9..cf9ea6b145 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -1333,7 +1333,7 @@ def test_alert_frequency_estimation(self):
 
             # RECAPDocument filed today that belongs to a docket filed outside
             # the estimation range.
-            date_outside_range = now() - datetime.timedelta(days=101)
+            date_outside_range = now() - datetime.timedelta(days=102)
             alert_de = DocketEntryWithParentsFactory(
                 docket=DocketFactory(
                     court=self.court,

From 5077e01c81318ecf6bb726fc256161f54a636614 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 9 Jul 2024 20:02:46 -0600
Subject: [PATCH 18/33] fix(alerts): Changed approach to filter out
 cross-object hits by using extra Docket-only and RD-only queries.

---
 .../commands/cl_send_recap_alerts.py          | 279 +++++++++++++-----
 .../commands/clean_up_search_alerts.py        |   2 +-
 cl/alerts/tests/tests_recap_alerts.py         | 274 +++++++++++++----
 cl/alerts/utils.py                            |  21 --
 cl/lib/elasticsearch_utils.py                 | 176 ++++++-----
 cl/search/documents.py                        |  21 +-
 cl/search/tests/tests_es_oral_arguments.py    |   2 +-
 cl/search/tests/tests_es_person.py            |   4 +-
 8 files changed, 547 insertions(+), 232 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index e9ec48e2d8..763651c985 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -12,7 +12,8 @@
 from elasticsearch import Elasticsearch
 from elasticsearch.exceptions import RequestError, TransportError
 from elasticsearch_dsl import connections
-from elasticsearch_dsl.response import Hit
+from elasticsearch_dsl.response import Hit, Response
+from elasticsearch_dsl.utils import AttrList
 from redis import Redis
 
 from cl.alerts.models import Alert, ScheduledAlertHit
@@ -21,7 +22,6 @@
     add_document_hit_to_alert_set,
     alert_hits_limit_reached,
     has_document_alert_hit_been_triggered,
-    query_includes_rd_field,
     recap_document_hl_matched,
 )
 from cl.api.models import WebhookEventType
@@ -30,7 +30,11 @@
 from cl.lib.date_time import dt_as_local_date
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
 from cl.lib.redis_utils import get_redis_interface
-from cl.search.documents import DocketDocument, RECAPSweepDocument
+from cl.search.documents import (
+    DocketDocument,
+    ESRECAPSweepDocument,
+    RECAPSweepDocument,
+)
 from cl.search.exception import (
     BadProximityQuery,
     UnbalancedParenthesesQuery,
@@ -117,8 +121,9 @@ def retrieve_task_info(task_info: dict[str, Any]) -> dict[str, Any]:
 def index_daily_recap_documents(
     r: Redis,
     source_index_name: str,
-    target_index: Type[RECAPSweepDocument],
+    target_index: Type[RECAPSweepDocument] | Type[ESRECAPSweepDocument],
     testing: bool = False,
+    only_rd: bool = False,
 ) -> int:
     """Index Dockets added/modified during the day and all their RECAPDocuments
     and RECAPDocuments added/modified during the day and their parent Dockets.
@@ -130,6 +135,8 @@ def index_daily_recap_documents(
     :param target_index: The target Elasticsearch index to which documents will
      be re-indexed.
     :param testing: Boolean flag for testing mode.
+    :param only_rd: Whether to reindex only RECAPDocuments into the
+    ESRECAPSweepDocument index.
     :return: The total number of documents re-indexed.
     """
 
@@ -166,14 +173,31 @@ def index_daily_recap_documents(
     today_datetime_iso = local_midnight_utc.isoformat().replace("+00:00", "Z")
     next_day_utc_iso = next_day_utc.isoformat().replace("+00:00", "Z")
     # Re Index API query.
-    query = {
-        "bool": {
-            "should": [
-                # Dockets added/modified today
-                {
-                    "bool": {
-                        "must": [
-                            {
+    query = (
+        {
+            "bool": {
+                "should": [
+                    # Dockets added/modified today
+                    {
+                        "bool": {
+                            "must": [
+                                {
+                                    "range": {
+                                        "timestamp": {
+                                            "gte": today_datetime_iso,
+                                            "lt": next_day_utc_iso,
+                                        }
+                                    }
+                                },
+                                {"term": {"docket_child": "docket"}},
+                            ]
+                        }
+                    },
+                    # RECAPDocuments with parents added/modified today
+                    {
+                        "has_parent": {
+                            "parent_type": "docket",
+                            "query": {
                                 "range": {
                                     "timestamp": {
                                         "gte": today_datetime_iso,
@@ -181,29 +205,29 @@ def index_daily_recap_documents(
                                     }
                                 }
                             },
-                            {"term": {"docket_child": "docket"}},
-                        ]
-                    }
-                },
-                # RECAPDocuments with parents added/modified today
-                {
-                    "has_parent": {
-                        "parent_type": "docket",
-                        "query": {
-                            "range": {
-                                "timestamp": {
-                                    "gte": today_datetime_iso,
-                                    "lt": next_day_utc_iso,
-                                }
-                            }
-                        },
-                    }
-                },
-                # RECAPDocuments added/modified today
-                {
-                    "bool": {
-                        "must": [
-                            {
+                        }
+                    },
+                    # RECAPDocuments added/modified today
+                    {
+                        "bool": {
+                            "must": [
+                                {
+                                    "range": {
+                                        "timestamp": {
+                                            "gte": today_datetime_iso,
+                                            "lt": next_day_utc_iso,
+                                        }
+                                    }
+                                },
+                                {"term": {"docket_child": "recap_document"}},
+                            ]
+                        }
+                    },
+                    # Dockets that are parents of RECAPDocuments added/modified today
+                    {
+                        "has_child": {
+                            "type": "recap_document",
+                            "query": {
                                 "range": {
                                     "timestamp": {
                                         "gte": today_datetime_iso,
@@ -211,27 +235,49 @@ def index_daily_recap_documents(
                                     }
                                 }
                             },
-                            {"term": {"docket_child": "recap_document"}},
-                        ]
-                    }
-                },
-                # Dockets that are parents of RECAPDocuments added/modified today
-                {
-                    "has_child": {
-                        "type": "recap_document",
-                        "query": {
-                            "range": {
-                                "timestamp": {
-                                    "gte": today_datetime_iso,
-                                    "lt": next_day_utc_iso,
+                        }
+                    },
+                ]
+            }
+        }
+        if not only_rd
+        else {
+            "bool": {
+                "should": [
+                    # RECAPDocuments with parents added/modified today
+                    {
+                        "has_parent": {
+                            "parent_type": "docket",
+                            "query": {
+                                "range": {
+                                    "timestamp": {
+                                        "gte": today_datetime_iso,
+                                        "lt": next_day_utc_iso,
+                                    }
                                 }
-                            }
-                        },
-                    }
-                },
-            ]
+                            },
+                        }
+                    },
+                    # RECAPDocuments added/modified today
+                    {
+                        "bool": {
+                            "must": [
+                                {
+                                    "range": {
+                                        "timestamp": {
+                                            "gte": today_datetime_iso,
+                                            "lt": next_day_utc_iso,
+                                        }
+                                    }
+                                },
+                                {"term": {"docket_child": "recap_document"}},
+                            ]
+                        }
+                    },
+                ]
+            }
         }
-    }
+    )
 
     if not r.exists("alert_sweep:task_id"):
         # Remove the index from the previous day and create a new one.
@@ -241,12 +287,39 @@ def index_daily_recap_documents(
 
         # In case of a failure, store the task_id in Redis so the command
         # can be resumed.
-        response = es.reindex(
-            source={"index": source_index_name, "query": query},
-            dest={"index": target_index_name},
-            wait_for_completion=False,
-            refresh=True,
-        )
+        params = {
+            "source": {"index": source_index_name, "query": query},
+            "dest": {"index": target_index_name},
+            "wait_for_completion": False,
+            "refresh": True,
+        }
+        if only_rd:
+            # Re-index only RECADocument fields to the ESRECAPSweepDocument
+            # index
+            params["script"] = {
+                "source": """
+                  def fields = [
+                    'id',
+                    'docket_entry_id',
+                    'description',
+                    'entry_number',
+                    'entry_date_filed',
+                    'short_description',
+                    'document_type',
+                    'document_number',
+                    'pacer_doc_id',
+                    'plain_text',
+                    'attachment_number',
+                    'is_available',
+                    'page_count',
+                    'filepath_local',
+                    'absolute_url',
+                    'cites'
+                  ];
+                  ctx._source.keySet().retainAll(fields);
+                """
+            }
+        response = es.reindex(**params)
         # Store the task ID in Redis
         task_id = response["task"]
         r.set("alert_sweep:task_id", task_id)
@@ -291,8 +364,6 @@ def index_daily_recap_documents(
             break
 
     r.delete("alert_sweep:task_id")
-    if not testing:
-        r.set("alert_sweep:re_index_completed", 1, ex=3600 * 12)
     return get_task_info["total"]
 
 
@@ -324,12 +395,20 @@ def should_docket_hit_be_included(
     return False
 
 
-def filter_rd_alert_hits(r: Redis, alert_id: int, rd_hits, check_rd_hl=False):
+def filter_rd_alert_hits(
+    r: Redis,
+    alert_id: int,
+    rd_hits: AttrList,
+    rd_ids: list[int],
+    check_rd_hl=False,
+):
     """Filter RECAP document hits based on specified conditions.
 
     :param r: The Redis interface.
     :param alert_id: The ID of the alert.
-    :param rd_hits: A list of RECAP document hits to be processed.
+    :param rd_hits: A list of RECAPDocument hits to be processed.
+    :param rd_ids: A list of RECAPDocument IDs that matched the RECAPDocument
+    only query.
     :param check_rd_hl: A boolean indicating whether to check if the RECAP
     document hit matched RD HLs.
     :return: A list of RECAP document hits that meet all specified conditions.
@@ -343,7 +422,10 @@ def filter_rd_alert_hits(r: Redis, alert_id: int, rd_hits, check_rd_hl=False):
             )
         ]
         if check_rd_hl:
-            conditions.append(recap_document_hl_matched(rd_hit))
+            if not recap_document_hl_matched(rd_hit):
+                # If the RECAPDocument hit didn't match any HL. Check if it should be included
+                # due to it matched the RECAPDocument only query.
+                conditions.append(rd_hit["_source"]["id"] in rd_ids)
         if all(conditions):
             rds_to_send.append(rd_hit)
             add_document_hit_to_alert_set(
@@ -354,11 +436,13 @@ def filter_rd_alert_hits(r: Redis, alert_id: int, rd_hits, check_rd_hl=False):
 
 def query_alerts(
     search_params: QueryDict,
-) -> tuple[list[Hit] | None, int | None]:
+) -> tuple[list[Hit] | None, Response | None, Response | None]:
     try:
         search_query = RECAPSweepDocument.search()
+        child_search_query = ESRECAPSweepDocument.search()
         return do_es_sweep_alert_query(
             search_query,
+            child_search_query,
             search_params,
         )
     except (
@@ -371,35 +455,52 @@ def query_alerts(
     ):
         traceback.print_exc()
         logger.info(f"Search for this alert failed: {search_params}\n")
-        return None, None
+        return None, None, None
 
 
 def process_alert_hits(
-    r: Redis, results: list[Hit], search_params: QueryDict, alert_id: int
+    r: Redis,
+    results: list[Hit],
+    parent_results: Response | None,
+    child_results: Response | None,
+    alert_id: int,
 ) -> list[Hit]:
     """Process alert hits by filtering and prepare the results to send based
     on alert conditions.
 
     :param r: The Redis instance.
     :param results: A list of Hit objects containing search results.
-    :param search_params: Query parameters used for the search.
+    :param parent_results: The ES Response for the docket-only query.
+    :param child_results: The ES Response for the RECAPDocument-only query.
     :param alert_id: The ID of the alert being processed.
     :return: A list of Hit objects that are filtered and prepared to be sent.
     """
 
-    includes_rd_fields = query_includes_rd_field(search_params)
+    docket_hits = parent_results.hits if parent_results else []
+    docket_ids = [int(d.docket_id) for d in docket_hits]
+
+    rd_hits = child_results.hits if child_results else []
+    rd_ids = [int(r.id) for r in rd_hits]
     results_to_send = []
     if len(results) > 0:
         for hit in results:
-            if not includes_rd_fields:
+            if hit.docket_id in docket_ids:
                 # Possible Docket-only alert
                 rds_to_send = filter_rd_alert_hits(
-                    r, alert_id, hit["child_docs"], check_rd_hl=True
+                    r, alert_id, hit["child_docs"], rd_ids, check_rd_hl=True
                 )
                 if rds_to_send:
                     # Cross-object query
                     hit["child_docs"] = rds_to_send
                     results_to_send.append(hit)
+                    if should_docket_hit_be_included(
+                        r, alert_id, hit.docket_id
+                    ):
+                        add_document_hit_to_alert_set(
+                            r, alert_id, "d", hit.docket_id
+                        )
+
+                # Docket-only alert
                 elif should_docket_hit_be_included(r, alert_id, hit.docket_id):
                     # Docket-only alert
                     hit["child_docs"] = []
@@ -407,10 +508,11 @@ def process_alert_hits(
                     add_document_hit_to_alert_set(
                         r, alert_id, "d", hit.docket_id
                     )
+
             else:
                 # RECAP-only alerts or cross-object alerts
                 rds_to_send = filter_rd_alert_hits(
-                    r, alert_id, hit["child_docs"]
+                    r, alert_id, hit["child_docs"], rd_ids
                 )
                 if rds_to_send:
                     # Cross-object alert
@@ -456,13 +558,19 @@ def query_and_send_alerts(r: Redis, rate: str) -> None:
         alerts_to_update = []
         for alert in alerts:
             search_params = QueryDict(alert.query.encode(), mutable=True)
-            results, _ = query_alerts(search_params)
+            results, parent_results, child_results = query_alerts(
+                search_params
+            )
             if not results:
                 continue
             alerts_to_update.append(alert.pk)
             search_type = search_params.get("type", SEARCH_TYPES.RECAP)
             results_to_send = process_alert_hits(
-                r, results, search_params, alert.pk
+                r,
+                results,
+                parent_results,
+                child_results,
+                alert.pk,
             )
             if results_to_send:
                 hits.append(
@@ -500,11 +608,18 @@ def query_and_schedule_alerts(r: Redis, rate: str):
         scheduled_hits_to_create = []
         for alert in alerts:
             search_params = QueryDict(alert.query.encode(), mutable=True)
-            results, _ = query_alerts(search_params)
+            results, parent_results, child_results = query_alerts(
+                search_params
+            )
             if not results:
                 continue
+
             results_to_send = process_alert_hits(
-                r, results, search_params, alert.pk
+                r,
+                results,
+                parent_results,
+                child_results,
+                alert.pk,
             )
             if results_to_send:
                 for hit in results_to_send:
@@ -563,6 +678,16 @@ def handle(self, *args, **options):
             RECAPSweepDocument,
             testing=testing_mode,
         )
+        index_daily_recap_documents(
+            r,
+            DocketDocument._index._name,
+            ESRECAPSweepDocument,
+            testing=testing_mode,
+            only_rd=True,
+        )
+        if not testing_mode:
+            r.set("alert_sweep:re_index_completed", 1, ex=3600 * 12)
+
         query_and_send_alerts(r, Alert.REAL_TIME)
         query_and_send_alerts(r, Alert.DAILY)
         query_and_schedule_alerts(r, Alert.WEEKLY)
diff --git a/cl/alerts/management/commands/clean_up_search_alerts.py b/cl/alerts/management/commands/clean_up_search_alerts.py
index b00d7128a3..cf1ceb2f54 100644
--- a/cl/alerts/management/commands/clean_up_search_alerts.py
+++ b/cl/alerts/management/commands/clean_up_search_alerts.py
@@ -75,7 +75,7 @@ def validate_queries_syntax(options: OptionsType) -> None:
         if search_form.is_valid():
             cd = search_form.cleaned_data
             try:
-                s, _ = build_es_base_query(search_query, cd)
+                s, _, _ = build_es_base_query(search_query, cd)
                 s = s.extra(size=0)
                 s.execute().to_dict()
                 # Waiting between requests to avoid hammering ES too quickly.
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index cf9ea6b145..1fd573c8fb 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -17,7 +17,7 @@
     index_daily_recap_documents,
 )
 from cl.alerts.models import SEARCH_TYPES, Alert, ScheduledAlertHit
-from cl.alerts.utils import query_includes_rd_field, recap_document_hl_matched
+from cl.alerts.utils import recap_document_hl_matched
 from cl.api.factories import WebhookFactory
 from cl.api.models import WebhookEvent, WebhookEventType
 from cl.donate.models import NeonMembership
@@ -78,6 +78,7 @@ def setUp(self):
         self.r = get_redis_interface("CACHE")
         self.r.delete("alert_sweep:query_date")
         self.r.delete("alert_sweep:task_id")
+        self.r.delete("alert_hits:")
 
     @staticmethod
     def get_html_content_from_email(email_content):
@@ -257,7 +258,10 @@ async def test_recap_document_hl_matched(self) -> None:
             "q": '"401 Civil"',
         }
         search_query = RECAPSweepDocument.search()
-        results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
+        results, parent_results, _ = await sync_to_async(
+            do_es_sweep_alert_query
+        )(
+            search_query,
             search_query,
             search_params,
         )
@@ -272,7 +276,10 @@ async def test_recap_document_hl_matched(self) -> None:
             "q": '"Mauris iaculis, leo sit amet hendrerit vehicula"',
         }
         search_query = RECAPSweepDocument.search()
-        results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
+        results, parent_results, _ = await sync_to_async(
+            do_es_sweep_alert_query
+        )(
+            search_query,
             search_query,
             search_params,
         )
@@ -287,7 +294,10 @@ async def test_recap_document_hl_matched(self) -> None:
             "q": "SUBPOENAS SERVED OFF Mauris iaculis",
         }
         search_query = RECAPSweepDocument.search()
-        results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
+        results, parent_results, _ = await sync_to_async(
+            do_es_sweep_alert_query
+        )(
+            search_query,
             search_query,
             search_params,
         )
@@ -296,58 +306,6 @@ async def test_recap_document_hl_matched(self) -> None:
             rd_field_matched = recap_document_hl_matched(rd)
             self.assertEqual(rd_field_matched, True)
 
-    async def test_query_includes_rd_field(self) -> None:
-        """Test query_includes_rd_field method that checks if a query
-        includes any indexed fields in the query string or filters specific to
-        RECAP Documents.
-        """
-
-        # Docket-only query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": '"401 Civil"',
-        }
-        self.assertEqual(query_includes_rd_field(search_params), False)
-
-        # RECAPDocument-only query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": 'description:"lorem ipsum"',
-        }
-        self.assertEqual(query_includes_rd_field(search_params), True)
-
-        # Cross-object query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": 'case_name:"American v." description:"lorem ipsum"',
-        }
-        self.assertEqual(query_includes_rd_field(search_params), True)
-
-        # Docket-only query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": "",
-            "case_name": "SUBPOENAS",
-        }
-        self.assertEqual(query_includes_rd_field(search_params), False)
-
-        # RECAPDocument-only query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": "",
-            "description": "Lorem",
-        }
-        self.assertEqual(query_includes_rd_field(search_params), True)
-
-        # Cross-object query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": "",
-            "case_name": "SUBPOENAS",
-            "document_number": 1,
-        }
-        self.assertEqual(query_includes_rd_field(search_params), True)
-
     def test_filter_recap_alerts_to_send(self) -> None:
         """Test filter RECAP alerts that met the conditions to be sent:
         - RECAP type alert.
@@ -907,6 +865,210 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
 
         docket.delete()
 
+    def test_special_cross_object_alerts(self) -> None:
+        """This test confirms that hits are properly filtered out or included
+        in alerts for special cross-object alerts that can match either a
+        Docket-only hit and/or Docket + RDs simultaneously in the same hit.
+        These cases include queries that use an OR clause combining
+        Docket field + RD fields or a text query that can match a Docket and
+        RD field simultaneously.
+        """
+
+        # The following test confirms that an alert with a query that can match
+        # a Docket or RECAPDocuments simultaneously is properly filtered.
+        cross_object_alert_d_or_rd_field = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object query",
+            query=f"q=docket_id:{self.de.docket.pk} OR pacer_doc_id:{self.rd_2.pacer_doc_id}&type=r",
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts", testing_mode=True)
+
+        # A new alert should be triggered containing a Docket-only hit and a
+        # Docket with the nested RD matched.
+        self.assertEqual(
+            len(mail.outbox), 1, msg="Outgoing emails don't match."
+        )
+        html_content = self.get_html_content_from_email(mail.outbox[0])
+        self._confirm_number_of_alerts(html_content, 1)
+        # This hit should only display the Docket matched by its ID,
+        # no RECAPDocument should be matched.
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_d_or_rd_field.name,
+            self.de.docket.case_name,
+            [],
+        )
+
+        # This hit should display the rd_2 nested below its parent docket.
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_d_or_rd_field.name,
+            self.de_1.docket.case_name,
+            [self.rd_2.description],
+        )
+
+        # Assert email text version:
+        txt_email = mail.outbox[0].body
+        self.assertIn(cross_object_alert_d_or_rd_field.name, txt_email)
+        self.assertIn(self.rd_2.description, txt_email)
+
+        # This test confirms a text query cross-object alert matches documents
+        # according to trigger conditions like indexed date and previous triggers
+        # by the same document.
+        two_days_before = self.mock_date - datetime.timedelta(days=2)
+        mock_two_days_before = two_days_before.replace(hour=5)
+        with time_machine.travel(mock_two_days_before, tick=False):
+            docket = DocketFactory(
+                court=self.court,
+                case_name="United States of America",
+                docket_number="1:21-bk-1009",
+                source=Docket.RECAP,
+            )
+
+        with time_machine.travel(
+            self.mock_date, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
+            alert_de = DocketEntryWithParentsFactory(
+                docket=docket,
+                entry_number=1,
+                date_filed=datetime.date(2024, 8, 19),
+                description="MOTION for Leave to File",
+            )
+            rd_3 = RECAPDocumentFactory(
+                docket_entry=alert_de,
+                description="Motion to File New",
+                document_number="2",
+                pacer_doc_id="018036652875",
+                plain_text="United states Lorem",
+            )
+
+            docket_2 = DocketFactory(
+                court=self.court,
+                case_name="United States of America vs Lorem",
+                docket_number="1:21-bk-1008",
+                source=Docket.RECAP,
+            )
+
+        cross_object_alert_text = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object query",
+            query=f'q="United states"&type=r',
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts", testing_mode=True)
+
+        # A new alert should be triggered containing two hits. One matched by
+        # the rd_3 plain text description and one matched by docket_2 case_name
+        self.assertEqual(
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
+        )
+        html_content = self.get_html_content_from_email(mail.outbox[1])
+        # rd_3 should appear nested in this hit.
+        self._confirm_number_of_alerts(html_content, 1)
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_text.name,
+            docket.case_name,
+            [rd_3.description],
+        )
+        # The docket_2 hit shouldn't contain RDs.
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_text.name,
+            docket_2.case_name,
+            [],
+        )
+
+        # Modify the docket today:
+        with time_machine.travel(
+            self.mock_date, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
+            docket.cause = "405 Civil"
+            docket.save()
+
+        # Trigger the alert again:
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts", testing_mode=True)
+
+        # A new alert should be triggered containing docket as a hit with no
+        # nested RDs.
+        html_content = self.get_html_content_from_email(mail.outbox[2])
+        self.assertEqual(
+            len(mail.outbox), 3, msg="Outgoing emails don't match."
+        )
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_text.name,
+            docket.case_name,
+            [],
+        )
+
+        # Trigger alert again:
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts", testing_mode=True)
+
+        # No new alerts should be triggered.
+        self.assertEqual(
+            len(mail.outbox), 3, msg="Outgoing emails don't match."
+        )
+
+        # This test confirms that we're able to trigger cross-object alerts
+        # that include an OR clause and match documents that belong to the
+        # same case.
+        cross_object_alert_d_or_rd_field_same_case = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object query",
+            query=f"q=docket_id:{self.de.docket.pk} OR pacer_doc_id:{self.rd.pacer_doc_id}&type=r",
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts", testing_mode=True)
+
+        # A new alert should be triggered, containing the RD document nested below
+        # its parent docket.
+        html_content = self.get_html_content_from_email(mail.outbox[3])
+        self.assertEqual(
+            len(mail.outbox), 4, msg="Outgoing emails don't match."
+        )
+        self._confirm_number_of_alerts(html_content, 1)
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_d_or_rd_field_same_case.name,
+            self.de.docket.case_name,
+            [self.rd.description],
+        )
+
+        docket.delete()
+        docket_2.delete()
+
     def test_limit_alert_case_child_hits(self) -> None:
         """Test limit case child hits up to 5 and display the "View additional
         results for this Case" button.
diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py
index 44277a04a2..6ad589a913 100644
--- a/cl/alerts/utils.py
+++ b/cl/alerts/utils.py
@@ -169,27 +169,6 @@ def recap_document_hl_matched(rd_hit: Hit) -> bool:
     return False
 
 
-def query_includes_rd_field(query_params: CleanData) -> bool:
-    """Determine whether the query includes any indexed fields in the query
-    string or filters specific to RECAP Documents.
-
-    :param query_params: The query parameters.
-    :return: True if any recap document fields or filters are included in the
-    query, otherwise False.
-    """
-
-    query_string = query_params.get("q", "")
-    for rd_field in recap_document_indexed_fields:
-        if f"{rd_field}:" in query_string:
-            return True
-
-    for rd_filter in recap_document_filters:
-        if query_params.get(rd_filter, ""):
-            return True
-
-    return False
-
-
 def make_alert_set_key(alert_id: int, document_type: str) -> str:
     """Generate a Redis key for storing alert hits.
 
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 72c982deca..f22b61de47 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1070,7 +1070,7 @@ def build_es_base_query(
     child_highlighting: bool = True,
     api_version: Literal["v3", "v4"] | None = None,
     alerts: bool = False,
-) -> tuple[Search, QueryString | None]:
+) -> tuple[Search, QueryString | None, QueryString | None]:
     """Builds filters and fulltext_query based on the given cleaned
      data and returns an elasticsearch query.
 
@@ -1079,14 +1079,15 @@ def build_es_base_query(
     :param child_highlighting: Whether highlighting should be enabled in child docs.
     :param api_version: Optional, the request API version.
     :param alerts: If highlighting is being applied to search Alerts hits.
-    :return: A two-tuple, the Elasticsearch search query object and an ES
-    QueryString for child documents, or None if there is no need to query
-    child documents.
+    :return: A three-tuple, the Elasticsearch search query object and an ES
+    QueryString for child documents or None if there is no need to query
+    child documents and a QueryString for parent documents or None.
     """
 
     main_query = None
     string_query = None
-    join_query = None
+    child_docs_query = None
+    parent_query = None
     filters = []
     plain_doc = False
     match cd["type"]:
@@ -1131,12 +1132,14 @@ def build_es_base_query(
                     ],
                 )
             )
-            main_query, join_query = build_full_join_es_queries(
-                cd,
-                child_query_fields,
-                parent_query_fields,
-                child_highlighting=child_highlighting,
-                api_version=api_version,
+            main_query, child_docs_query, parent_query = (
+                build_full_join_es_queries(
+                    cd,
+                    child_query_fields,
+                    parent_query_fields,
+                    child_highlighting=child_highlighting,
+                    api_version=api_version,
+                )
             )
 
         case (
@@ -1167,13 +1170,15 @@ def build_es_base_query(
                     ],
                 )
             )
-            main_query, join_query = build_full_join_es_queries(
-                cd,
-                child_query_fields,
-                parent_query_fields,
-                child_highlighting=child_highlighting,
-                api_version=api_version,
-                alerts=alerts,
+            main_query, child_docs_query, parent_query = (
+                build_full_join_es_queries(
+                    cd,
+                    child_query_fields,
+                    parent_query_fields,
+                    child_highlighting=child_highlighting,
+                    api_version=api_version,
+                    alerts=alerts,
+                )
             )
 
         case SEARCH_TYPES.OPINION:
@@ -1207,13 +1212,15 @@ def build_es_base_query(
                     ],
                 )
             )
-            main_query, join_query = build_full_join_es_queries(
-                cd,
-                child_query_fields,
-                parent_query_fields,
-                mlt_query,
-                child_highlighting=child_highlighting,
-                api_version=api_version,
+            main_query, child_docs_query, parent_query = (
+                build_full_join_es_queries(
+                    cd,
+                    child_query_fields,
+                    parent_query_fields,
+                    mlt_query,
+                    child_highlighting=child_highlighting,
+                    api_version=api_version,
+                )
             )
 
     if not any([filters, string_query, main_query]):
@@ -1222,7 +1229,7 @@ def build_es_base_query(
         match_all_query = get_match_all_query(
             cd, search_query, api_version, child_highlighting
         )
-        return match_all_query, join_query
+        return match_all_query, child_docs_query, parent_query
 
     if plain_doc:
         # Combine the filters and string query for plain documents like Oral
@@ -1231,7 +1238,7 @@ def build_es_base_query(
             cd, filters, string_query, api_version
         )
 
-    return search_query.query(main_query), join_query
+    return search_query.query(main_query), child_docs_query, parent_query
 
 
 def build_has_parent_parties_query(
@@ -1261,7 +1268,7 @@ def build_has_parent_parties_query(
 
 
 def build_child_docs_query(
-    join_query: QueryString | None,
+    child_docs_query: QueryString | None,
     cd: CleanData,
     exclude_docs_for_empty_field: str = "",
 ) -> QueryString:
@@ -1271,7 +1278,7 @@ def build_child_docs_query(
     to retrieve child documents directly, such as in the Opinions Feed,
     RECAP Feed, RECAP Documents count query, and V4 RECAP_DOCUMENT Search API.
 
-    :param join_query: Existing Elasticsearch QueryString object or None
+    :param child_docs_query: Existing Elasticsearch QueryString object or None
     :param cd: The user input CleanedData
     :param exclude_docs_for_empty_field: Field that should not be empty for a
     document to be included
@@ -1289,7 +1296,7 @@ def build_child_docs_query(
     ]
     parties_has_parent_query = build_has_parent_parties_query(parties_filters)
 
-    if not join_query:
+    if not child_docs_query:
         # Match all query case.
         if not exclude_docs_for_empty_field:
             if cd["type"] == SEARCH_TYPES.OPINION:
@@ -1311,7 +1318,7 @@ def build_child_docs_query(
                 filters.append(child_query_recap)
             return Q("bool", filter=filters)
 
-    query_dict = join_query.to_dict()
+    query_dict = child_docs_query.to_dict()
     if "filter" in query_dict["bool"]:
         existing_filter = query_dict["bool"]["filter"]
         if cd["type"] == SEARCH_TYPES.OPINION:
@@ -1373,7 +1380,7 @@ def get_facet_dict_for_search_query(
     """
 
     cd["just_facets_query"] = True
-    search_query, _ = build_es_base_query(search_query, cd)
+    search_query, _, _ = build_es_base_query(search_query, cd)
     search_query.aggs.bucket("status", A("terms", field="status.raw"))
     search_query = search_query.extra(size=0)
     response = search_query.execute()
@@ -1395,7 +1402,7 @@ def build_es_main_query(
     applicable.
     """
     search_query_base = search_query
-    search_query, join_query = build_es_base_query(search_query, cd)
+    search_query, child_docs_query, _ = build_es_base_query(search_query, cd)
     top_hits_limit = 5
     child_docs_count_query = None
     match cd["type"]:
@@ -1413,7 +1420,9 @@ def build_es_main_query(
                 top_hits_limit,
             )
         case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS:
-            child_docs_count_query = build_child_docs_query(join_query, cd)
+            child_docs_count_query = build_child_docs_query(
+                child_docs_query, cd
+            )
             if child_docs_count_query:
                 # Get the total RECAP Documents count.
                 child_docs_count_query = search_query_base.query(
@@ -2214,13 +2223,13 @@ def build_search_feed_query(
     hl_field = "text"
     if cd["type"] == SEARCH_TYPES.RECAP:
         hl_field = "plain_text"
-    s, join_query = build_es_base_query(search_query, cd)
+    s, child_docs_query, _ = build_es_base_query(search_query, cd)
     if jurisdiction or cd["type"] == SEARCH_TYPES.RECAP:
         # An Opinion Jurisdiction feed or RECAP Search displays child documents
         # Eliminate items that lack the ordering field and apply highlighting
         # to create a snippet for the plain_text or text fields.
         s = build_child_docs_query(
-            join_query,
+            child_docs_query,
             cd=cd,
             exclude_docs_for_empty_field=exclude_docs_for_empty_field,
         )
@@ -2336,7 +2345,7 @@ def build_full_join_es_queries(
     child_highlighting: bool = True,
     api_version: Literal["v3", "v4"] | None = None,
     alerts: bool = False,
-) -> tuple[QueryString | list, QueryString | None]:
+) -> tuple[QueryString | list, QueryString | None, QueryString | None]:
     """Build a complete Elasticsearch query with both parent and child document
       conditions.
 
@@ -2347,7 +2356,8 @@ def build_full_join_es_queries(
     :param child_highlighting: Whether highlighting should be enabled in child docs.
     :param api_version: Optional, the request API version.
     :param alerts: If highlighting is being applied to search Alerts hits.
-    :return: An Elasticsearch QueryString object.
+    :return: A three-tuple: the main join query, the child documents query, and
+    the parent documents query.
     """
 
     q_should = []
@@ -2363,7 +2373,8 @@ def build_full_join_es_queries(
         case SEARCH_TYPES.PEOPLE:
             child_type = "position"
 
-    join_query = None
+    child_docs_query = None
+    parent_query = None
     if cd["type"] in [
         SEARCH_TYPES.RECAP,
         SEARCH_TYPES.DOCKETS,
@@ -2419,18 +2430,18 @@ def build_full_join_es_queries(
             case [], []:
                 pass
             case [], _:
-                join_query = Q(
+                child_docs_query = Q(
                     "bool",
                     should=child_text_query,
                     minimum_should_match=1,
                 )
             case _, []:
-                join_query = Q(
+                child_docs_query = Q(
                     "bool",
                     filter=child_filters,
                 )
             case _, _:
-                join_query = Q(
+                child_docs_query = Q(
                     "bool",
                     filter=child_filters,
                     should=child_text_query,
@@ -2446,7 +2457,7 @@ def build_full_join_es_queries(
                 (child_highlighting, cd["type"]), {}
             )
             has_child_query = build_has_child_query(
-                join_query,
+                child_docs_query,
                 child_type,
                 query_hits_limit,
                 hl_fields,
@@ -2525,9 +2536,9 @@ def build_full_join_es_queries(
             q_should.append(parent_query)
 
     if not q_should:
-        return [], join_query
+        return [], child_docs_query, parent_query
 
-    final_query = apply_custom_score_to_main_query(
+    main_join_query = apply_custom_score_to_main_query(
         cd,
         Q(
             "bool",
@@ -2535,10 +2546,7 @@ def build_full_join_es_queries(
         ),
         api_version,
     )
-    return (
-        final_query,
-        join_query,
-    )
+    return (main_join_query, child_docs_query, parent_query)
 
 
 def limit_inner_hits(
@@ -2859,10 +2867,8 @@ def do_es_api_query(
     child documents.
     """
 
-    child_docs_query = None
-
     try:
-        s, join_query = build_es_base_query(
+        s, child_docs_query, _ = build_es_base_query(
             search_query, cd, cd["highlight"], api_version
         )
     except (
@@ -2881,7 +2887,7 @@ def do_es_api_query(
         # Note that in V3 Case Law Search, opinions are collapsed by cluster_id
         # meaning that only one result per cluster is shown.
         s = build_child_docs_query(
-            join_query,
+            child_docs_query,
             cd=cd,
         )
         main_query = search_query.query(s)
@@ -2917,7 +2923,7 @@ def do_es_api_query(
         )
     else:
         child_docs_query = build_child_docs_query(
-            join_query,
+            child_docs_query,
             cd=cd,
         )
         # Build query params for the ES V4 Search API endpoints.
@@ -3032,7 +3038,7 @@ def do_es_alert_estimation_query(
         days=int(day_count)
     )
     cd[before_field] = None
-    estimation_query, _ = build_es_base_query(search_query, cd)
+    estimation_query, _, _ = build_es_base_query(search_query, cd)
 
     if cd["type"] == SEARCH_TYPES.RECAP:
         # The RECAP estimation query consists of two requests: one to estimate
@@ -3054,8 +3060,8 @@ def do_es_alert_estimation_query(
         multi_search = multi_search.add(main_doc_count_query)
 
         # Build RECAPDocuments count query.
-        _, join_query = build_es_base_query(search_query, cd)
-        child_docs_count_query = build_child_docs_query(join_query, cd)
+        _, child_docs_query, _ = build_es_base_query(search_query, cd)
+        child_docs_count_query = build_child_docs_query(child_docs_query, cd)
         child_total = 0
         if child_docs_count_query:
             child_docs_count_query = search_query.query(child_docs_count_query)
@@ -3077,11 +3083,14 @@ def do_es_alert_estimation_query(
 
 def do_es_sweep_alert_query(
     search_query: Search,
+    child_search_query: Search,
     cd: CleanData,
-) -> tuple[list[Hit] | None, int | None]:
+) -> tuple[list[Hit] | None, Response | None, Response | None]:
     """Build an ES query for its use in the daily RECAP sweep index.
 
     :param search_query: Elasticsearch DSL Search object.
+    :param child_search_query: The Elasticsearch DSL search query to perform
+    the child-only query.
     :param cd: The query CleanedData
     :return: A two-tuple, the Elasticsearch search query object and an ES
     Query for child documents, or None if there is no need to query
@@ -3092,29 +3101,54 @@ def do_es_sweep_alert_query(
     if search_form.is_valid():
         cd = search_form.cleaned_data
     else:
-        return None, None
-
-    total_hits = None
-
-    s, _ = build_es_base_query(search_query, cd, True, alerts=True)
+        return None, None, None
 
+    s, child_query, parent_query = build_es_base_query(
+        search_query, cd, True, alerts=True
+    )
     main_query = add_es_highlighting(s, cd, alerts=True)
     main_query = main_query.sort(build_sort_results(cd))
     main_query = main_query.extra(
         from_=0, size=settings.SCHEDULED_ALERT_HITS_LIMIT
     )
-    results = main_query.execute()
-    if results:
-        total_hits = results.hits.total.value
-
-    limit_inner_hits({}, results, cd["type"])
-    set_results_highlights(results, cd["type"])
 
-    for result in results:
+    multi_search = MultiSearch()
+    multi_search = multi_search.add(main_query)
+    if parent_query:
+        parent_search = search_query.query(parent_query)
+        parent_search = parent_search.extra(
+            from_=0, size=settings.SCHEDULED_ALERT_HITS_LIMIT
+        )
+        parent_search = parent_search.source(includes=["docket_id"])
+        multi_search = multi_search.add(parent_search)
+
+    if child_query:
+        child_search = child_search_query.query(child_query)
+        child_search = child_search.extra(
+            from_=0,
+            size=settings.SCHEDULED_ALERT_HITS_LIMIT
+            * settings.RECAP_CHILD_HITS_PER_RESULT,
+        )
+        child_search = child_search.source(includes=["id"])
+        multi_search = multi_search.add(child_search)
+
+    responses = multi_search.execute()
+    main_results = responses[0]
+    rd_results = None
+    docket_results = None
+    if parent_query:
+        docket_results = responses[1]
+    if child_query:
+        rd_results = responses[2]
+
+    limit_inner_hits({}, main_results, cd["type"])
+    set_results_highlights(main_results, cd["type"])
+
+    for result in main_results:
         child_result_objects = []
         if hasattr(result, "child_docs"):
             for child_doc in result.child_docs:
                 child_result_objects.append(child_doc.to_dict())
             result["child_docs"] = child_result_objects
 
-    return results, total_hits
+    return main_results, docket_results, rd_results
diff --git a/cl/search/documents.py b/cl/search/documents.py
index 378dbb9477..cedc638170 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -4,6 +4,7 @@
 from django.http import QueryDict
 from django.utils.html import escape, strip_tags
 from django_elasticsearch_dsl import Document, fields
+from elasticsearch_dsl import Document as DSLDocument
 
 from cl.alerts.models import Alert
 from cl.audio.models import Audio
@@ -364,7 +365,7 @@ def prepare_percolator_query(self, instance):
 
         cd = search_form.cleaned_data
         search_query = AudioDocument.search()
-        query, _ = build_es_base_query(search_query, cd)
+        query, _, _ = build_es_base_query(search_query, cd)
         return query.to_dict()["query"]
 
 
@@ -961,8 +962,7 @@ def prepare_timestamp(self, instance):
         return datetime.utcnow()
 
 
-@recap_index.document
-class ESRECAPDocument(DocketBaseDocument):
+class ESRECAPBaseDocument(DSLDocument):
     id = fields.IntegerField(attr="pk")
     docket_entry_id = fields.IntegerField(attr="docket_entry.pk")
     description = fields.TextField(
@@ -1030,6 +1030,10 @@ class ESRECAPDocument(DocketBaseDocument):
         fields.IntegerField(multi=True),
     )
 
+
+@recap_index.document
+class ESRECAPDocument(DocketBaseDocument, ESRECAPBaseDocument):
+
     class Django:
         model = RECAPDocument
         ignore_signals = True
@@ -1837,3 +1841,14 @@ class Index:
             "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
             "analysis": settings.ELASTICSEARCH_DSL["analysis"],
         }
+
+
+class ESRECAPSweepDocument(ESRECAPBaseDocument):
+
+    class Index:
+        name = "recap_document_sweep"
+        settings = {
+            "number_of_shards": settings.ELASTICSEARCH_RECAP_NUMBER_OF_SHARDS,
+            "number_of_replicas": settings.ELASTICSEARCH_RECAP_NUMBER_OF_REPLICAS,
+            "analysis": settings.ELASTICSEARCH_DSL["analysis"],
+        }
diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py
index f47453b7bc..a8f2db4541 100644
--- a/cl/search/tests/tests_es_oral_arguments.py
+++ b/cl/search/tests/tests_es_oral_arguments.py
@@ -984,7 +984,7 @@ def confirm_query_matched(response, query_id) -> bool:
     @staticmethod
     def save_percolator_query(cd):
         search_query = AudioDocument.search()
-        query, _ = build_es_base_query(search_query, cd)
+        query, _, _ = build_es_base_query(search_query, cd)
         query_dict = query.to_dict()["query"]
         percolator_query = AudioPercolator(
             percolator_query=query_dict, rate=Alert.REAL_TIME
diff --git a/cl/search/tests/tests_es_person.py b/cl/search/tests/tests_es_person.py
index 0eb72bfe96..ef994b9a50 100644
--- a/cl/search/tests/tests_es_person.py
+++ b/cl/search/tests/tests_es_person.py
@@ -1342,7 +1342,7 @@ def test_has_child_filters(self) -> None:
             "type": SEARCH_TYPES.PEOPLE,
         }
         s = PersonDocument.search()
-        main_query, _ = build_es_base_query(s, cd)
+        main_query, _, _ = build_es_base_query(s, cd)
         self.assertEqual(main_query.count(), 2)
 
         # Query by parent field dob_state and child field selection_method.
@@ -1352,7 +1352,7 @@ def test_has_child_filters(self) -> None:
             "type": SEARCH_TYPES.PEOPLE,
         }
         s = PersonDocument.search()
-        main_query, _ = build_es_base_query(s, cd)
+        main_query, _, _ = build_es_base_query(s, cd)
         self.assertEqual(main_query.count(), 1)
 
         position_5.delete()

From a4e4e62250184b1de9b754cceff5cf60deb19723 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 10 Jul 2024 16:42:27 -0600
Subject: [PATCH 19/33] fix(alerts): Added more tests related to filtering
 cross-object hits.

- Fixed issues and improved command resumability
---
 .../commands/cl_send_recap_alerts.py          | 115 +++++---
 cl/alerts/tests/tests_recap_alerts.py         | 265 ++++++++++++++----
 2 files changed, 290 insertions(+), 90 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 763651c985..7f05ce4291 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -2,7 +2,7 @@
 import datetime
 import time
 import traceback
-from typing import Any, Type
+from typing import Any, Literal, Type
 
 import pytz
 from asgiref.sync import async_to_sync
@@ -140,12 +140,21 @@ def index_daily_recap_documents(
     :return: The total number of documents re-indexed.
     """
 
-    if r.exists("alert_sweep:re_index_completed"):
+    if r.exists("alert_sweep:main_re_index_completed"):
         logger.info(
-            "The re-index task has been completed and will be omitted."
+            "The main re-index task has been completed and will be omitted."
         )
-        # The re-indexing has been completed for the day. Abort it and proceed
-        # with sending alerts.
+        # The main re-indexing has been completed for the day. Abort it and
+        # proceed with RECAPDocument re-index.
+        return 0
+
+    if r.exists("alert_sweep:rd_re_index_completed"):
+        logger.info(
+            "The RECAPDocument only re-index task has been completed and will "
+            "be omitted."
+        )
+        # The RECAPDocument re-indexing has been completed for the day. Abort
+        # it and proceed with sending alerts.
         return 0
 
     if not r.exists("alert_sweep:query_date"):
@@ -368,7 +377,7 @@ def fields = [
 
 
 def should_docket_hit_be_included(
-    r: Redis, alert_id: int, docket_id: int
+    r: Redis, alert_id: int, docket_id: int, query_date: datetime.date
 ) -> bool:
     """Determine if a Docket alert should be triggered based on its
     date_modified and if the docket has triggered the alert previously.
@@ -376,21 +385,19 @@ def should_docket_hit_be_included(
     :param r: The Redis interface.
     :param alert_id: The ID of the alert.
     :param docket_id: The ID of the docket.
+    :param query_date: The daily re_index query date.
     :return: True if the Docket alert should be triggered, False otherwise.
     """
     docket = Docket.objects.filter(id=docket_id).only("date_modified").first()
     if not docket:
         return False
     if not has_document_alert_hit_been_triggered(r, alert_id, "d", docket_id):
-        local_midnight_localized = timezone.localtime(
-            timezone.make_aware(
-                datetime.datetime.fromisoformat(
-                    str(r.get("alert_sweep:query_date"))
-                )
-            )
-        )
+        # Confirm the docket has been modified during the day we’re sending
+        # alerts to avoid triggering docket-only alerts due to RECAPDocuments
+        # related to the case being indexed during the day since RD contains
+        # docket fields indexed which can trigger docket-only alerts.
         date_modified_localized = dt_as_local_date(docket.date_modified)
-        if date_modified_localized == local_midnight_localized.date():
+        if date_modified_localized == query_date:
             return True
     return False
 
@@ -464,6 +471,7 @@ def process_alert_hits(
     parent_results: Response | None,
     child_results: Response | None,
     alert_id: int,
+    query_date: datetime.date,
 ) -> list[Hit]:
     """Process alert hits by filtering and prepare the results to send based
     on alert conditions.
@@ -473,9 +481,9 @@ def process_alert_hits(
     :param parent_results: The ES Response for the docket-only query.
     :param child_results: The ES Response for the RECAPDocument-only query.
     :param alert_id: The ID of the alert being processed.
+    :param query_date: The daily re_index query date.
     :return: A list of Hit objects that are filtered and prepared to be sent.
     """
-
     docket_hits = parent_results.hits if parent_results else []
     docket_ids = [int(d.docket_id) for d in docket_hits]
 
@@ -490,32 +498,31 @@ def process_alert_hits(
                     r, alert_id, hit["child_docs"], rd_ids, check_rd_hl=True
                 )
                 if rds_to_send:
-                    # Cross-object query
+                    # Docket OR RECAPDocument alert.
                     hit["child_docs"] = rds_to_send
                     results_to_send.append(hit)
                     if should_docket_hit_be_included(
-                        r, alert_id, hit.docket_id
+                        r, alert_id, hit.docket_id, query_date
                     ):
                         add_document_hit_to_alert_set(
                             r, alert_id, "d", hit.docket_id
                         )
 
-                # Docket-only alert
-                elif should_docket_hit_be_included(r, alert_id, hit.docket_id):
+                elif should_docket_hit_be_included(
+                    r, alert_id, hit.docket_id, query_date
+                ):
                     # Docket-only alert
                     hit["child_docs"] = []
                     results_to_send.append(hit)
                     add_document_hit_to_alert_set(
                         r, alert_id, "d", hit.docket_id
                     )
-
             else:
-                # RECAP-only alerts or cross-object alerts
+                # RECAPDocument-only alerts or cross-object alerts
                 rds_to_send = filter_rd_alert_hits(
                     r, alert_id, hit["child_docs"], rd_ids
                 )
                 if rds_to_send:
-                    # Cross-object alert
                     hit["child_docs"] = rds_to_send
                     results_to_send.append(hit)
     return results_to_send
@@ -541,7 +548,18 @@ def send_search_alert_webhooks(
         )
 
 
-def query_and_send_alerts(r: Redis, rate: str) -> None:
+def query_and_send_alerts(
+    r: Redis, rate: Literal["rt", "dly"], query_date: datetime.date
+) -> None:
+    """Query the sweep index and send alerts based on the specified rate
+    and date.
+
+    :param r: The Redis interface.
+    :param rate: The rate at which to query alerts.
+    :param query_date: The daily re_index query date.
+    :return: None.
+    """
+
     alert_users: UserProfile.user = User.objects.filter(
         alerts__rate=rate
     ).distinct()
@@ -566,11 +584,7 @@ def query_and_send_alerts(r: Redis, rate: str) -> None:
             alerts_to_update.append(alert.pk)
             search_type = search_params.get("type", SEARCH_TYPES.RECAP)
             results_to_send = process_alert_hits(
-                r,
-                results,
-                parent_results,
-                child_results,
-                alert.pk,
+                r, results, parent_results, child_results, alert.pk, query_date
             )
             if results_to_send:
                 hits.append(
@@ -600,7 +614,18 @@ def query_and_send_alerts(r: Redis, rate: str) -> None:
         logger.info(f"Sent {alerts_sent_count} {rate} email alerts.")
 
 
-def query_and_schedule_alerts(r: Redis, rate: str):
+def query_and_schedule_alerts(
+    r: Redis, rate: Literal["wly", "mly"], query_date: datetime.date
+) -> None:
+    """Query the sweep index and schedule alerts based on the specified rate
+    and date.
+
+    :param r: The Redis interface.
+    :param rate: The rate at which to query alerts.
+    :param query_date: The daily re_index query date.
+    :return: None.
+    """
+
     alert_users = User.objects.filter(alerts__rate=rate).distinct()
     for user in alert_users:
         alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP)
@@ -615,11 +640,7 @@ def query_and_schedule_alerts(r: Redis, rate: str):
                 continue
 
             results_to_send = process_alert_hits(
-                r,
-                results,
-                parent_results,
-                child_results,
-                alert.pk,
+                r, results, parent_results, child_results, alert.pk, query_date
             )
             if results_to_send:
                 for hit in results_to_send:
@@ -678,6 +699,10 @@ def handle(self, *args, **options):
             RECAPSweepDocument,
             testing=testing_mode,
         )
+        if not testing_mode:
+            # main_re_index_completed key so the main re_index task can be
+            # omitted in case of a failure.
+            r.set("alert_sweep:main_re_index_completed", 1, ex=3600 * 12)
         index_daily_recap_documents(
             r,
             DocketDocument._index._name,
@@ -686,11 +711,21 @@ def handle(self, *args, **options):
             only_rd=True,
         )
         if not testing_mode:
-            r.set("alert_sweep:re_index_completed", 1, ex=3600 * 12)
+            # rd_re_index_completed key so the RECAPDocument re_index task
+            # can be omitted in case of a failure.
+            r.set("alert_sweep:rd_re_index_completed", 1, ex=3600 * 12)
 
-        query_and_send_alerts(r, Alert.REAL_TIME)
-        query_and_send_alerts(r, Alert.DAILY)
-        query_and_schedule_alerts(r, Alert.WEEKLY)
-        query_and_schedule_alerts(r, Alert.MONTHLY)
-        r.delete("alert_sweep:re_index_completed")
+        query_date = timezone.localtime(
+            timezone.make_aware(
+                datetime.datetime.fromisoformat(
+                    str(r.get("alert_sweep:query_date"))
+                )
+            )
+        ).date()
+        query_and_send_alerts(r, Alert.REAL_TIME, query_date)
+        query_and_send_alerts(r, Alert.DAILY, query_date)
+        query_and_schedule_alerts(r, Alert.WEEKLY, query_date)
+        query_and_schedule_alerts(r, Alert.MONTHLY, query_date)
+        r.delete("alert_sweep:main_re_index_completed")
+        r.delete("alert_sweep:rd_re_index_completed")
         r.delete("alert_sweep:query_date")
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 1fd573c8fb..a661347975 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -78,7 +78,9 @@ def setUp(self):
         self.r = get_redis_interface("CACHE")
         self.r.delete("alert_sweep:query_date")
         self.r.delete("alert_sweep:task_id")
-        self.r.delete("alert_hits:")
+        keys = self.r.keys("alert_hits:*")
+        if keys:
+            self.r.delete(*keys)
 
     @staticmethod
     def get_html_content_from_email(email_content):
@@ -141,9 +143,7 @@ def _count_alert_hits_and_child_hits(
         self.assertTrue(
             alert_element, msg=f"Not alert with title {alert_title} found."
         )
-
         alert_cases = self._extract_cases_from_alert(tree, alert_title)
-
         self.assertEqual(
             len(alert_cases),
             expected_hits,
@@ -152,21 +152,23 @@ def _count_alert_hits_and_child_hits(
             % (alert_title, expected_hits, len(alert_cases)),
         )
         if case_title:
-            child_hit_count = 0
             for case in alert_cases:
-                case_text = " ".join(case.xpath(".//text()")).strip()
+                child_hit_count = 0
+                case_text = " ".join(
+                    [element.strip() for element in case.xpath(".//text()")]
+                )
                 if case_title in case_text:
                     child_hit_count = len(
                         case.xpath("following-sibling::ul[1]/li/a")
                     )
-
-            self.assertEqual(
-                child_hit_count,
-                expected_child_hits,
-                msg="Did not get the right number of child hits for the case %s. "
-                "Expected: %s - Got: %s\n\n"
-                % (case_title, expected_child_hits, child_hit_count),
-            )
+                self.assertEqual(
+                    child_hit_count,
+                    expected_child_hits,
+                    msg="Did not get the right number of child hits for the case %s. "
+                    "Expected: %s - Got: %s\n\n"
+                    % (case_title, expected_child_hits, child_hit_count),
+                )
+                break
 
     def _assert_child_hits_content(
         self,
@@ -865,17 +867,17 @@ def test_filter_out_alerts_to_send_by_query_and_hits(self) -> None:
 
         docket.delete()
 
-    def test_special_cross_object_alerts(self) -> None:
+    def test_special_cross_object_alerts_or_clause(self) -> None:
         """This test confirms that hits are properly filtered out or included
         in alerts for special cross-object alerts that can match either a
         Docket-only hit and/or Docket + RDs simultaneously in the same hit.
         These cases include queries that use an OR clause combining
-        Docket field + RD fields or a text query that can match a Docket and
-        RD field simultaneously.
+        Docket field + RD fields.
         """
 
         # The following test confirms that an alert with a query that can match
-        # a Docket or RECAPDocuments simultaneously is properly filtered.
+        # a Docket or RECAPDocuments from different cases simultaneously are
+        # properly filtered.
         cross_object_alert_d_or_rd_field = AlertFactory(
             user=self.user_profile.user,
             rate=Alert.REAL_TIME,
@@ -897,34 +899,88 @@ def test_special_cross_object_alerts(self) -> None:
         )
         html_content = self.get_html_content_from_email(mail.outbox[0])
         self._confirm_number_of_alerts(html_content, 1)
+
         # This hit should only display the Docket matched by its ID,
         # no RECAPDocument should be matched.
-        self._assert_child_hits_content(
+        self._count_alert_hits_and_child_hits(
             html_content,
             cross_object_alert_d_or_rd_field.name,
+            2,
             self.de.docket.case_name,
-            [],
+            0,
         )
-
-        # This hit should display the rd_2 nested below its parent docket.
+        # The second hit should display the rd_2 nested below its parent docket.
         self._assert_child_hits_content(
             html_content,
             cross_object_alert_d_or_rd_field.name,
             self.de_1.docket.case_name,
             [self.rd_2.description],
         )
-
         # Assert email text version:
         txt_email = mail.outbox[0].body
         self.assertIn(cross_object_alert_d_or_rd_field.name, txt_email)
         self.assertIn(self.rd_2.description, txt_email)
 
+        # This test confirms that we're able to trigger cross-object alerts
+        # that include an OR clause and match documents that belong to the
+        # same case.
+        cross_object_alert_d_or_rd_field_same_case = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object query",
+            query=f"q=docket_id:{self.de.docket.pk} OR pacer_doc_id:{self.rd.pacer_doc_id}&type=r",
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts", testing_mode=True)
+
+        # A new alert should be triggered, containing the RD document nested
+        # below its parent docket.
+        self.assertEqual(
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
+        )
+        html_content = self.get_html_content_from_email(mail.outbox[1])
+        self._confirm_number_of_alerts(html_content, 1)
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            cross_object_alert_d_or_rd_field.name,
+            1,
+            self.de.docket.case_name,
+            1,
+        )
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_d_or_rd_field_same_case.name,
+            self.de.docket.case_name,
+            [self.rd.description],
+        )
+
+    def test_special_cross_object_alerts_text_query(self) -> None:
+        """This test confirms that hits are properly filtered out or included
+        in alerts for special cross-object alerts that can match either a
+        Docket-only hit and/or Docket + RDs simultaneously in the same hit.
+        These cases include queries that use a text query that can match a
+        Docket and RD field simultaneously.
+        """
+
         # This test confirms a text query cross-object alert matches documents
         # according to trigger conditions like indexed date and previous triggers
         # by the same document.
+        cross_object_alert_text = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object text query",
+            query=f'q="United states"&type=r',
+        )
         two_days_before = self.mock_date - datetime.timedelta(days=2)
         mock_two_days_before = two_days_before.replace(hour=5)
-        with time_machine.travel(mock_two_days_before, tick=False):
+        with time_machine.travel(
+            mock_two_days_before, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
             docket = DocketFactory(
                 court=self.court,
                 case_name="United States of America",
@@ -932,6 +988,22 @@ def test_special_cross_object_alerts(self) -> None:
                 source=Docket.RECAP,
             )
 
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts", testing_mode=True)
+
+        # No alert should be triggered since the matched docket was not
+        # modified during the current day.
+        self.assertEqual(
+            len(mail.outbox), 0, msg="Outgoing emails don't match."
+        )
+
+        # Index new documents that match cross_object_alert_text, an RD, and
+        # an empty docket.
         with time_machine.travel(
             self.mock_date, tick=False
         ), self.captureOnCommitCallbacks(execute=True):
@@ -951,17 +1023,11 @@ def test_special_cross_object_alerts(self) -> None:
 
             docket_2 = DocketFactory(
                 court=self.court,
-                case_name="United States of America vs Lorem",
+                case_name="United States vs Lorem",
                 docket_number="1:21-bk-1008",
                 source=Docket.RECAP,
             )
 
-        cross_object_alert_text = AlertFactory(
-            user=self.user_profile.user,
-            rate=Alert.REAL_TIME,
-            name="Test Alert Cross-object query",
-            query=f'q="United states"&type=r',
-        )
         with mock.patch(
             "cl.api.webhooks.requests.post",
             side_effect=lambda *args, **kwargs: MockResponse(
@@ -970,14 +1036,21 @@ def test_special_cross_object_alerts(self) -> None:
         ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
-        # A new alert should be triggered containing two hits. One matched by
+        # An alert should be triggered containing two hits. One matched by
         # the rd_3 plain text description and one matched by docket_2 case_name
         self.assertEqual(
-            len(mail.outbox), 2, msg="Outgoing emails don't match."
+            len(mail.outbox), 1, msg="Outgoing emails don't match."
         )
-        html_content = self.get_html_content_from_email(mail.outbox[1])
-        # rd_3 should appear nested in this hit.
+        html_content = self.get_html_content_from_email(mail.outbox[0])
         self._confirm_number_of_alerts(html_content, 1)
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            cross_object_alert_text.name,
+            2,
+            docket.case_name,
+            1,
+        )
+        # rd_3 should appear nested in this hit.
         self._assert_child_hits_content(
             html_content,
             cross_object_alert_text.name,
@@ -991,8 +1064,7 @@ def test_special_cross_object_alerts(self) -> None:
             docket_2.case_name,
             [],
         )
-
-        # Modify the docket today:
+        # Modify 1:21-bk-1009 docket today:
         with time_machine.travel(
             self.mock_date, tick=False
         ), self.captureOnCommitCallbacks(execute=True):
@@ -1008,11 +1080,19 @@ def test_special_cross_object_alerts(self) -> None:
         ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
 
-        # A new alert should be triggered containing docket as a hit with no
-        # nested RDs.
-        html_content = self.get_html_content_from_email(mail.outbox[2])
+        # A new alert should be triggered containing the docket as a hit with
+        # no nested RDs.
+        html_content = self.get_html_content_from_email(mail.outbox[1])
         self.assertEqual(
-            len(mail.outbox), 3, msg="Outgoing emails don't match."
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
+        )
+        self._confirm_number_of_alerts(html_content, 1)
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            cross_object_alert_text.name,
+            1,
+            docket.case_name,
+            0,
         )
         self._assert_child_hits_content(
             html_content,
@@ -1029,20 +1109,41 @@ def test_special_cross_object_alerts(self) -> None:
             ),
         ), time_machine.travel(self.mock_date, tick=False):
             call_command("cl_send_recap_alerts", testing_mode=True)
-
         # No new alerts should be triggered.
         self.assertEqual(
-            len(mail.outbox), 3, msg="Outgoing emails don't match."
+            len(mail.outbox), 2, msg="Outgoing emails don't match."
         )
 
+        # Index new documents that match cross_object_alert_text, an RD, and
+        # an empty docket.
+        with time_machine.travel(
+            self.mock_date, tick=False
+        ), self.captureOnCommitCallbacks(execute=True):
+            rd_4 = RECAPDocumentFactory(
+                docket_entry=alert_de,
+                description="Hearing new",
+                document_number="3",
+                pacer_doc_id="0180366528790",
+                plain_text="Lorem ipsum",
+            )
+            rd_5 = RECAPDocumentFactory(
+                docket_entry=alert_de,
+                description="Hearing new 2",
+                document_number="4",
+                pacer_doc_id="018026657750",
+                plain_text="United states of america plain text",
+            )
+
         # This test confirms that we're able to trigger cross-object alerts
-        # that include an OR clause and match documents that belong to the
-        # same case.
-        cross_object_alert_d_or_rd_field_same_case = AlertFactory(
+        # that include an OR clause and a cross-object text query.
+        cross_object_alert_d_or_rd_field_text_query = AlertFactory(
             user=self.user_profile.user,
             rate=Alert.REAL_TIME,
-            name="Test Alert Cross-object query",
-            query=f"q=docket_id:{self.de.docket.pk} OR pacer_doc_id:{self.rd.pacer_doc_id}&type=r",
+            name="Test Alert Cross-object query combined.",
+            query=f"q=docket_id:{self.de.docket.pk} OR "
+            f"pacer_doc_id:{self.rd.pacer_doc_id} OR "
+            f'("United States of America" OR '
+            f"pacer_doc_id:{rd_3.pacer_doc_id})&type=r",
         )
         with mock.patch(
             "cl.api.webhooks.requests.post",
@@ -1054,16 +1155,80 @@ def test_special_cross_object_alerts(self) -> None:
 
         # A new alert should be triggered, containing the RD document nested below
         # its parent docket.
+        html_content = self.get_html_content_from_email(mail.outbox[2])
+        self.assertEqual(
+            len(mail.outbox), 3, msg="Outgoing emails don't match."
+        )
+        # The email contains two alerts: one for cross_object_alert_text
+        # triggered by the new rd_5 added, and one for cross_object_alert_d_or_rd_field_text_query.
+        self._confirm_number_of_alerts(html_content, 2)
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            cross_object_alert_text.name,
+            1,
+            docket.case_name,
+            1,
+        )
+        # The cross_object_alert_d_or_rd_field_text_query alert contains two
+        # hits. The first one matches "docket" and rd_3 and rd_5 nested below
+        # due to the OR clause in the text query, and the second hit matches
+        # self.de.docket and self.rd.
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            cross_object_alert_d_or_rd_field_text_query.name,
+            2,
+            docket.case_name,
+            2,
+        )
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_d_or_rd_field_text_query.name,
+            docket.case_name,
+            [rd_3.description, rd_5.description],
+        )
+        self._assert_child_hits_content(
+            html_content,
+            cross_object_alert_d_or_rd_field_text_query.name,
+            self.de.docket.case_name,
+            [self.rd.description],
+        )
+
+        # This test confirms that hits are properly filtered when using AND in
+        # the text query.
+        cross_object_alert_d_or_rd_field_text_query_and = AlertFactory(
+            user=self.user_profile.user,
+            rate=Alert.REAL_TIME,
+            name="Test Alert Cross-object query combined.",
+            query=f'q=("United States of America" AND '
+            f"pacer_doc_id:{rd_3.pacer_doc_id})&type=r",
+        )
+        with mock.patch(
+            "cl.api.webhooks.requests.post",
+            side_effect=lambda *args, **kwargs: MockResponse(
+                200, mock_raw=True
+            ),
+        ), time_machine.travel(self.mock_date, tick=False):
+            call_command("cl_send_recap_alerts", testing_mode=True)
+
+        # A new alert should be triggered, containing rd_3 document nested below
+        # its parent docket.
         html_content = self.get_html_content_from_email(mail.outbox[3])
         self.assertEqual(
             len(mail.outbox), 4, msg="Outgoing emails don't match."
         )
         self._confirm_number_of_alerts(html_content, 1)
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            cross_object_alert_d_or_rd_field_text_query_and.name,
+            1,
+            docket.case_name,
+            1,
+        )
         self._assert_child_hits_content(
             html_content,
-            cross_object_alert_d_or_rd_field_same_case.name,
-            self.de.docket.case_name,
-            [self.rd.description],
+            cross_object_alert_d_or_rd_field_text_query_and.name,
+            docket.case_name,
+            [rd_3.description],
         )
 
         docket.delete()

From b56f2354e43c74267811fd1f1567b2e6ae2cdb7d Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 25 Jul 2024 14:51:55 -0500
Subject: [PATCH 20/33] fix(alerts): Restore send_es_search_alert_webhook to
 avoid conflicts due to scheduled task

- This can be removed after tasks in the queue have been processed.
---
 .../commands/cl_send_recap_alerts.py          |  4 +-
 cl/alerts/tasks.py                            |  4 +-
 cl/api/tasks.py                               | 42 +++++++++++++++++++
 3 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 7f05ce4291..193823cb22 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -25,7 +25,7 @@
     recap_document_hl_matched,
 )
 from cl.api.models import WebhookEventType
-from cl.api.tasks import send_es_search_alert_webhook
+from cl.api.tasks import send_search_alert_webhook_es
 from cl.lib.command_utils import VerboseCommand, logger
 from cl.lib.date_time import dt_as_local_date
 from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
@@ -543,7 +543,7 @@ def send_search_alert_webhooks(
         event_type=WebhookEventType.SEARCH_ALERT, enabled=True
     )
     for user_webhook in user_webhooks:
-        send_es_search_alert_webhook.delay(
+        send_search_alert_webhook_es.delay(
             results_to_send, user_webhook.pk, alert_id
         )
 
diff --git a/cl/alerts/tasks.py b/cl/alerts/tasks.py
index f7b004bc54..e3aad0a071 100644
--- a/cl/alerts/tasks.py
+++ b/cl/alerts/tasks.py
@@ -25,7 +25,7 @@
 from cl.api.models import WebhookEventType
 from cl.api.tasks import (
     send_docket_alert_webhook_events,
-    send_es_search_alert_webhook,
+    send_search_alert_webhook_es,
 )
 from cl.celery_init import app
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -458,7 +458,7 @@ def send_webhook_alert_hits(
             event_type=WebhookEventType.SEARCH_ALERT, enabled=True
         )
         for user_webhook in user_webhooks:
-            send_es_search_alert_webhook.delay(
+            send_search_alert_webhook_es.delay(
                 documents,
                 user_webhook.pk,
                 alert.pk,
diff --git a/cl/api/tasks.py b/cl/api/tasks.py
index a0d6112444..7f0b8d2cdd 100644
--- a/cl/api/tasks.py
+++ b/cl/api/tasks.py
@@ -82,8 +82,50 @@ def send_docket_alert_webhook_events(
         send_webhook_event(webhook_event, json_bytes)
 
 
+# TODO: Remove after scheduled OA alerts have been processed.
 @app.task()
 def send_es_search_alert_webhook(
+    results: list[dict[str, Any]],
+    webhook_pk: int,
+    alert: Alert,
+) -> None:
+    """Send a search alert webhook event containing search results from a
+    search alert object.
+
+    :param results: The search results returned by SOLR for this alert.
+    :param webhook_pk: The webhook endpoint ID object to send the event to.
+    :param alert: The search alert object.
+    """
+
+    webhook = Webhook.objects.get(pk=webhook_pk)
+    serialized_alert = SearchAlertSerializerModel(alert).data
+    es_results = []
+    for result in results:
+        result["snippet"] = result["text"]
+        es_results.append(ResultObject(initial=result))
+    serialized_results = V3OAESResultSerializer(es_results, many=True).data
+
+    post_content = {
+        "webhook": generate_webhook_key_content(webhook),
+        "payload": {
+            "results": serialized_results,
+            "alert": serialized_alert,
+        },
+    }
+    renderer = JSONRenderer()
+    json_bytes = renderer.render(
+        post_content,
+        accepted_media_type="application/json;",
+    )
+    webhook_event = WebhookEvent.objects.create(
+        webhook=webhook,
+        content=post_content,
+    )
+    send_webhook_event(webhook_event, json_bytes)
+
+
+@app.task()
+def send_search_alert_webhook_es(
     results: list[dict[str, Any]] | list[Hit],
     webhook_pk: int,
     alert_pk: int,

From d1026640871d3bcca5774cca102dc466c905c406 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 29 Jul 2024 10:13:13 -0500
Subject: [PATCH 21/33] fix(alerts): Fixed MLY alerts test can't be sent after
 the 28th

---
 cl/alerts/tests/tests_recap_alerts.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index a661347975..3a067858d2 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -1608,7 +1608,9 @@ def test_schedule_wly_and_mly_recap_alerts(self) -> None:
         self.assertIn(self.rd.description, txt_email)
 
         # Send  scheduled Monthly alerts and check assertions.
-        call_command("cl_send_scheduled_alerts", rate=Alert.MONTHLY)
+        current_date = now().replace(day=28, hour=0)
+        with time_machine.travel(current_date, tick=False):
+            call_command("cl_send_scheduled_alerts", rate=Alert.MONTHLY)
         self.assertEqual(
             len(mail.outbox), 2, msg="Outgoing emails don't match."
         )

From 57b6df7ac2e46514ca729f4d528ec7a46149fe82 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 26 Sep 2024 16:55:26 -0600
Subject: [PATCH 22/33] fix(alerts): Fixed merge conflicts and adjust test
 accordingly new RECAP_CHILD_HITS_PER_RESULT value

---
 cl/alerts/tests/tests_recap_alerts.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 3a067858d2..b339a10ef7 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -1247,15 +1247,15 @@ def test_limit_alert_case_child_hits(self) -> None:
                 description="MOTION for Leave to File Amicus Curiae Lorem Served",
             )
             rd_descriptions = []
-            for i in range(6):
+            for i in range(4):
                 rd = RECAPDocumentFactory(
                     docket_entry=alert_de,
                     description=f"Motion to File {i+1}",
                     document_number=f"{i+1}",
                     pacer_doc_id=f"018036652436{i+1}",
                 )
-                if i < 5:
-                    # Omit the last alert to compare. Only up to 5 should be
+                if i < 3:
+                    # Omit the last alert to compare. Only up to 3 should be
                     # included in the case.
                     rd_descriptions.append(rd.description)
 
@@ -1286,13 +1286,13 @@ def test_limit_alert_case_child_hits(self) -> None:
         html_content = self.get_html_content_from_email(mail.outbox[0])
         self.assertIn(recap_only_alert.name, html_content)
         self._confirm_number_of_alerts(html_content, 1)
-        # The case alert should contain up to 5 child hits.
+        # The case alert should contain up to 3 child hits.
         self._count_alert_hits_and_child_hits(
             html_content,
             recap_only_alert.name,
             1,
             self.de.docket.case_name,
-            5,
+            3,
         )
         self._assert_child_hits_content(
             html_content,

From b35ef0aad49b230ad98735c4631986f7facd0069 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 26 Sep 2024 18:36:31 -0600
Subject: [PATCH 23/33] fix(elasticsearch): Fixed failing test due to
 build_full_join_es_queries returning values change

---
 cl/lib/elasticsearch_utils.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 34aceeeac8..df17f5e1dc 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1197,15 +1197,21 @@ def build_es_base_query(
                 mlt_query = async_to_sync(build_more_like_this_query)(
                     cluster_pks
                 )
-                main_query, join_query = build_full_join_es_queries(
-                    cd,
-                    {"opinion": []},
-                    [],
-                    mlt_query,
-                    child_highlighting=False,
-                    api_version=api_version,
+                main_query, child_docs_query, parent_query = (
+                    build_full_join_es_queries(
+                        cd,
+                        {"opinion": []},
+                        [],
+                        mlt_query,
+                        child_highlighting=False,
+                        api_version=api_version,
+                    )
+                )
+                return (
+                    search_query.query(main_query),
+                    child_docs_query,
+                    parent_query,
                 )
-                return search_query.query(main_query), join_query
 
             opinion_search_fields = SEARCH_OPINION_QUERY_FIELDS
             child_fields = opinion_search_fields.copy()

From 8902aa0c226c2aac1cc2ce82fe0c76205569cdde Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 26 Sep 2024 18:46:29 -0600
Subject: [PATCH 24/33] fix(alerts): Removed recap_document_hl_matched as we no
 longer rely on HL to filter out RD hits

---
 .../commands/cl_send_recap_alerts.py          | 22 +++---
 cl/alerts/tests/tests_recap_alerts.py         | 68 -------------------
 cl/alerts/utils.py                            | 22 ------
 3 files changed, 12 insertions(+), 100 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 193823cb22..8640ddc70a 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -22,7 +22,6 @@
     add_document_hit_to_alert_set,
     alert_hits_limit_reached,
     has_document_alert_hit_been_triggered,
-    recap_document_hl_matched,
 )
 from cl.api.models import WebhookEventType
 from cl.api.tasks import send_search_alert_webhook_es
@@ -407,7 +406,7 @@ def filter_rd_alert_hits(
     alert_id: int,
     rd_hits: AttrList,
     rd_ids: list[int],
-    check_rd_hl=False,
+    check_rd_matched=False,
 ):
     """Filter RECAP document hits based on specified conditions.
 
@@ -416,8 +415,8 @@ def filter_rd_alert_hits(
     :param rd_hits: A list of RECAPDocument hits to be processed.
     :param rd_ids: A list of RECAPDocument IDs that matched the RECAPDocument
     only query.
-    :param check_rd_hl: A boolean indicating whether to check if the RECAP
-    document hit matched RD HLs.
+    :param check_rd_matched: A boolean indicating whether to check if the RECAP
+     document hit from the main query also matches the RECAPDocument-only query
     :return: A list of RECAP document hits that meet all specified conditions.
     """
 
@@ -428,11 +427,10 @@ def filter_rd_alert_hits(
                 r, alert_id, "r", rd_hit["_source"]["id"]
             )
         ]
-        if check_rd_hl:
-            if not recap_document_hl_matched(rd_hit):
-                # If the RECAPDocument hit didn't match any HL. Check if it should be included
-                # due to it matched the RECAPDocument only query.
-                conditions.append(rd_hit["_source"]["id"] in rd_ids)
+        if check_rd_matched:
+            # Add condition to check if the RD hit is within the RD IDS returned
+            # by the RECAPDocument-only query.
+            conditions.append(rd_hit["_source"]["id"] in rd_ids)
         if all(conditions):
             rds_to_send.append(rd_hit)
             add_document_hit_to_alert_set(
@@ -495,7 +493,11 @@ def process_alert_hits(
             if hit.docket_id in docket_ids:
                 # Possible Docket-only alert
                 rds_to_send = filter_rd_alert_hits(
-                    r, alert_id, hit["child_docs"], rd_ids, check_rd_hl=True
+                    r,
+                    alert_id,
+                    hit["child_docs"],
+                    rd_ids,
+                    check_rd_matched=True,
                 )
                 if rds_to_send:
                     # Docket OR RECAPDocument alert.
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index b339a10ef7..8e57da973a 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -17,7 +17,6 @@
     index_daily_recap_documents,
 )
 from cl.alerts.models import SEARCH_TYPES, Alert, ScheduledAlertHit
-from cl.alerts.utils import recap_document_hl_matched
 from cl.api.factories import WebhookFactory
 from cl.api.models import WebhookEvent, WebhookEventType
 from cl.donate.models import NeonMembership
@@ -241,73 +240,6 @@ def _count_webhook_hits_and_child_hits(
                             % case_title,
                         )
 
-    async def test_recap_document_hl_matched(self) -> None:
-        """Test recap_document_hl_matched method that determines weather a hit
-        contains RECAPDocument HL fields."""
-
-        # Index base document factories.
-        with time_machine.travel(self.mock_date, tick=False):
-            index_daily_recap_documents(
-                self.r,
-                DocketDocument._index._name,
-                RECAPSweepDocument,
-                testing=True,
-            )
-
-        # Docket-only query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": '"401 Civil"',
-        }
-        search_query = RECAPSweepDocument.search()
-        results, parent_results, _ = await sync_to_async(
-            do_es_sweep_alert_query
-        )(
-            search_query,
-            search_query,
-            search_params,
-        )
-        docket_result = results[0]
-        for rd in docket_result["child_docs"]:
-            rd_field_matched = recap_document_hl_matched(rd)
-            self.assertEqual(rd_field_matched, False)
-
-        # RECAPDocument-only query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": '"Mauris iaculis, leo sit amet hendrerit vehicula"',
-        }
-        search_query = RECAPSweepDocument.search()
-        results, parent_results, _ = await sync_to_async(
-            do_es_sweep_alert_query
-        )(
-            search_query,
-            search_query,
-            search_params,
-        )
-        docket_result = results[0]
-        for rd in docket_result["child_docs"]:
-            rd_field_matched = recap_document_hl_matched(rd)
-            self.assertEqual(rd_field_matched, True)
-
-        # Cross-object query
-        search_params = {
-            "type": SEARCH_TYPES.RECAP,
-            "q": "SUBPOENAS SERVED OFF Mauris iaculis",
-        }
-        search_query = RECAPSweepDocument.search()
-        results, parent_results, _ = await sync_to_async(
-            do_es_sweep_alert_query
-        )(
-            search_query,
-            search_query,
-            search_params,
-        )
-        docket_result = results[0]
-        for rd in docket_result["child_docs"]:
-            rd_field_matched = recap_document_hl_matched(rd)
-            self.assertEqual(rd_field_matched, True)
-
     def test_filter_recap_alerts_to_send(self) -> None:
         """Test filter RECAP alerts that met the conditions to be sent:
         - RECAP type alert.
diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py
index 6ad589a913..d98984abff 100644
--- a/cl/alerts/utils.py
+++ b/cl/alerts/utils.py
@@ -147,28 +147,6 @@ def alert_hits_limit_reached(alert_pk: int, user_pk: int) -> bool:
     return False
 
 
-def recap_document_hl_matched(rd_hit: Hit) -> bool:
-    """Determine whether HL matched a RECAPDocument text field.
-
-    :param rd_hit: The ES hit.
-    :return: True if the hit matched a RECAPDocument field. Otherwise, False.
-    """
-
-    matched_rd_hl: set[str] = set()
-    rd_hl_fields = set(SEARCH_RECAP_CHILD_HL_FIELDS.keys())
-    if hasattr(rd_hit, "highlight"):
-        highlights = rd_hit.highlight.to_dict()
-        matched_rd_hl.update(
-            hl_key
-            for hl_key, hl_value in highlights.items()
-            for hl in hl_value
-            if f"<{ALERTS_HL_TAG}>" in hl
-        )
-    if matched_rd_hl and matched_rd_hl.issubset(rd_hl_fields):
-        return True
-    return False
-
-
 def make_alert_set_key(alert_id: int, document_type: str) -> str:
     """Generate a Redis key for storing alert hits.
 

From 4babf5d3b2d0e1ec45b9604e8f532f1345ebc526 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 18 Oct 2024 13:26:43 -0400
Subject: [PATCH 25/33] feat(custom filter): Refactor alerts_supported method
 for better readability

---
 cl/custom_filters/templatetags/extras.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 90395d9356..39d535b2df 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -291,14 +291,9 @@ def alerts_supported(context: RequestContext, search_type: str) -> str:
     """
 
     request = context["request"]
-    return (
-        search_type == SEARCH_TYPES.OPINION
-        or search_type == SEARCH_TYPES.ORAL_ARGUMENT
-        or (
-            search_type == SEARCH_TYPES.RECAP
-            and waffle.flag_is_active(request, "recap-alerts-active")
-        )
-    )
+    if search_type == SEARCH_TYPES.RECAP:
+        return waffle.flag_is_active(request, "recap-alerts-active")
+    return search_type in (SEARCH_TYPES.OPINION, SEARCH_TYPES.ORAL_ARGUMENT)
 
 
 @register.filter

From a0085cce32242d78e8aed8db537bbb52c3517404 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 18 Oct 2024 13:33:35 -0400
Subject: [PATCH 26/33] refactor(alerts): Cleaned up unused imports in utils.py

---
 cl/alerts/utils.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py
index d98984abff..66047e5fe9 100644
--- a/cl/alerts/utils.py
+++ b/cl/alerts/utils.py
@@ -15,13 +15,6 @@
 )
 from cl.lib.command_utils import logger
 from cl.lib.elasticsearch_utils import add_es_highlighting
-from cl.lib.types import CleanData
-from cl.search.constants import (
-    ALERTS_HL_TAG,
-    SEARCH_RECAP_CHILD_HL_FIELDS,
-    recap_document_filters,
-    recap_document_indexed_fields,
-)
 from cl.search.documents import AudioPercolator
 from cl.search.models import SEARCH_TYPES, Docket
 

From 5f12c3037c3a3110fde28763fff6965b4090264f Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 18 Oct 2024 13:36:08 -0400
Subject: [PATCH 27/33] refactor(search): Cleanup unused constants

---
 cl/search/constants.py | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/cl/search/constants.py b/cl/search/constants.py
index 0efe9848bb..333dfbca6c 100644
--- a/cl/search/constants.py
+++ b/cl/search/constants.py
@@ -306,29 +306,6 @@
     Opinion.TRIAL_COURT: "trial-court-document",
 }
 
-recap_document_indexed_fields = [
-    "id",
-    "docket_entry_id",
-    "description",
-    "entry_number",
-    "entry_date_filed",
-    "short_description",
-    "document_type",
-    "document_number",
-    "pacer_doc_id",
-    "plain_text",
-    "attachment_number",
-    "is_available",
-    "page_count",
-    "cites",
-]
-
-recap_document_filters = [
-    "available_only",
-    "description",
-    "document_number",
-    "attachment_number",
-]
 
 cardinality_query_unique_ids = {
     SEARCH_TYPES.RECAP: "docket_id",

From 5fb177f83e8db2743a4d4eb6ab62acb82319597c Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 18 Oct 2024 13:41:50 -0400
Subject: [PATCH 28/33] refactor(alerts): Replaces Type import with built-in
 alternative

---
 cl/alerts/management/commands/cl_send_recap_alerts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 8640ddc70a..9589a7e8a6 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -2,7 +2,7 @@
 import datetime
 import time
 import traceback
-from typing import Any, Literal, Type
+from typing import Any, Literal
 
 import pytz
 from asgiref.sync import async_to_sync
@@ -120,7 +120,7 @@ def retrieve_task_info(task_info: dict[str, Any]) -> dict[str, Any]:
 def index_daily_recap_documents(
     r: Redis,
     source_index_name: str,
-    target_index: Type[RECAPSweepDocument] | Type[ESRECAPSweepDocument],
+    target_index: type[RECAPSweepDocument] | type[ESRECAPSweepDocument],
     testing: bool = False,
     only_rd: bool = False,
 ) -> int:

From 78955f1817110635af55690533fa1ffff2458968 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 18 Oct 2024 13:49:04 -0400
Subject: [PATCH 29/33] refactor(search): Removes unused argument from index
 command

---
 .../management/commands/cl_index_parent_and_child_docs.py    | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/cl/search/management/commands/cl_index_parent_and_child_docs.py b/cl/search/management/commands/cl_index_parent_and_child_docs.py
index 57cdf390fc..366d9fe13e 100644
--- a/cl/search/management/commands/cl_index_parent_and_child_docs.py
+++ b/cl/search/management/commands/cl_index_parent_and_child_docs.py
@@ -342,11 +342,6 @@ def add_arguments(self, parser):
             action="store_true",
             help="Use this flag to only index documents missing in the index.",
         )
-        parser.add_argument(
-            "--sweep-index",
-            action="store_true",
-            help="Whether to perform an indexing for the sweep index.",
-        )
 
     def handle(self, *args, **options):
         super().handle(*args, **options)

From da72292caa66abaff484a6eecce13c71729ce2d4 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 18 Oct 2024 14:03:55 -0400
Subject: [PATCH 30/33] feat(alert): Implements early returns in recap alert
 command

---
 .../commands/cl_send_recap_alerts.py          | 76 ++++++++++---------
 1 file changed, 39 insertions(+), 37 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 9589a7e8a6..6b6ce13f29 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -588,21 +588,22 @@ def query_and_send_alerts(
             results_to_send = process_alert_hits(
                 r, results, parent_results, child_results, alert.pk, query_date
             )
-            if results_to_send:
-                hits.append(
-                    [
-                        alert,
-                        search_type,
-                        results_to_send,
-                        len(results_to_send),
-                    ]
-                )
-                alert.query_run = search_params.urlencode()  # type: ignore
-                alert.date_last_hit = timezone.now()
-                alert.save()
+            if not results_to_send:
+                continue
+            hits.append(
+                [
+                    alert,
+                    search_type,
+                    results_to_send,
+                    len(results_to_send),
+                ]
+            )
+            alert.query_run = search_params.urlencode()  # type: ignore
+            alert.date_last_hit = timezone.now()
+            alert.save()
 
-                # Send webhooks
-                send_search_alert_webhooks(user, results_to_send, alert.pk)
+            # Send webhooks
+            send_search_alert_webhooks(user, results_to_send, alert.pk)
 
         if hits:
             send_search_alert_emails.delay([(user.pk, hits)])
@@ -644,31 +645,32 @@ def query_and_schedule_alerts(
             results_to_send = process_alert_hits(
                 r, results, parent_results, child_results, alert.pk, query_date
             )
-            if results_to_send:
-                for hit in results_to_send:
-                    # Schedule DAILY, WEEKLY and MONTHLY Alerts
-                    if alert_hits_limit_reached(alert.pk, user.pk):
-                        # Skip storing hits for this alert-user combination because
-                        # the SCHEDULED_ALERT_HITS_LIMIT has been reached.
-                        continue
-
-                    child_result_objects = []
-                    hit_copy = copy.deepcopy(hit)
-                    if hasattr(hit_copy, "child_docs"):
-                        for child_doc in hit_copy.child_docs:
-                            child_result_objects.append(
-                                child_doc["_source"].to_dict()
-                            )
-                    hit_copy["child_docs"] = child_result_objects
-                    scheduled_hits_to_create.append(
-                        ScheduledAlertHit(
-                            user=user,
-                            alert=alert,
-                            document_content=hit_copy.to_dict(),
+            if not results_to_send:
+                continue
+            for hit in results_to_send:
+                # Schedule DAILY, WEEKLY and MONTHLY Alerts
+                if alert_hits_limit_reached(alert.pk, user.pk):
+                    # Skip storing hits for this alert-user combination because
+                    # the SCHEDULED_ALERT_HITS_LIMIT has been reached.
+                    continue
+
+                child_result_objects = []
+                hit_copy = copy.deepcopy(hit)
+                if hasattr(hit_copy, "child_docs"):
+                    for child_doc in hit_copy.child_docs:
+                        child_result_objects.append(
+                            child_doc["_source"].to_dict()
                         )
+                hit_copy["child_docs"] = child_result_objects
+                scheduled_hits_to_create.append(
+                    ScheduledAlertHit(
+                        user=user,
+                        alert=alert,
+                        document_content=hit_copy.to_dict(),
                     )
-                    # Send webhooks
-                    send_search_alert_webhooks(user, results_to_send, alert.pk)
+                )
+                # Send webhooks
+                send_search_alert_webhooks(user, results_to_send, alert.pk)
 
         # Create scheduled WEEKLY and MONTHLY Alerts in bulk.
         if scheduled_hits_to_create:

From 0b62dca8f3d2eafaea996bd8ff66e0bee1baf198 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 18 Oct 2024 15:22:32 -0400
Subject: [PATCH 31/33] feat(alerts): Adds TaskCompletionStatus dataclass for
 tracking task progress

This commit introduces a new dataclass to store and manage information related to running Elasticsearch tasks. The dataclass includes properties for task completion status, created and total document counts.
---
 .../commands/cl_send_recap_alerts.py          | 78 +++++++++----------
 cl/alerts/utils.py                            |  8 ++
 2 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 6b6ce13f29..65d88bf7b6 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -19,6 +19,7 @@
 from cl.alerts.models import Alert, ScheduledAlertHit
 from cl.alerts.tasks import send_search_alert_emails
 from cl.alerts.utils import (
+    TaskCompletionStatus,
     add_document_hit_to_alert_set,
     alert_hits_limit_reached,
     has_document_alert_hit_been_triggered,
@@ -64,27 +65,34 @@ def get_task_status(task_id: str, es: Elasticsearch) -> dict[str, Any]:
 
 
 def compute_estimated_remaining_time(
-    initial_wait: float, start_time_millis: int, created: int, total: int
+    initial_wait: float, task_status: TaskCompletionStatus
 ) -> float:
     """Compute the estimated remaining time for the re_index task to complete.
 
     :param initial_wait: The default wait time in seconds.
-    :param start_time_millis: The start time in milliseconds epoch.
-    :param created: The number of items created so far.
-    :param total: The total number of items to be created.
+    :param task_status: An instance of `TaskCompletionStatus` containing task
+    information.
     :return: The estimated remaining time in seconds. If the start time,
     created, or total are invalid, the initial default time is returned.
     """
 
-    if start_time_millis is None or not created or not total:
+    if (
+        task_status.start_time_millis is None
+        or not task_status.created
+        or not task_status.total
+    ):
         return initial_wait
 
-    start_time = datetime.datetime.fromtimestamp(start_time_millis / 1000.0)
+    start_time = datetime.datetime.fromtimestamp(
+        task_status.start_time_millis / 1000.0
+    )
     time_now = datetime.datetime.now()
     estimated_time_remaining = max(
         datetime.timedelta(
-            seconds=((time_now - start_time).total_seconds() / created)
-            * (total - created)
+            seconds=(
+                (time_now - start_time).total_seconds() / task_status.created
+            )
+            * (task_status.total - task_status.created)
         ).total_seconds(),
         initial_wait,
     )
@@ -92,29 +100,23 @@ def compute_estimated_remaining_time(
     return estimated_time_remaining
 
 
-def retrieve_task_info(task_info: dict[str, Any]) -> dict[str, Any]:
+def retrieve_task_info(task_info: dict[str, Any]) -> TaskCompletionStatus:
     """Retrieve task information from the given task dict.
 
     :param task_info: A dictionary containing the task status information.
-    :return: A dictionary with the task completion status, created documents
-    count, total documents count, and the task start time in milliseconds.
-    Retrieve default values in case task_info is not valid.
+    :return: A `TaskCompletionStatus` object representing the extracted task
+    information.
     """
 
     if task_info:
         status = task_info["task"]["status"]
-        return {
-            "completed": task_info["completed"],
-            "created": status["created"],
-            "total": status["total"],
-            "start_time_millis": task_info["task"]["start_time_in_millis"],
-        }
-    return {
-        "completed": False,
-        "created": 0,
-        "total": 0,
-        "start_time_millis": None,
-    }
+        return TaskCompletionStatus(
+            completed=task_info["completed"],
+            created=status["created"],
+            total=status["total"],
+            start_time_millis=task_info["task"]["start_time_in_millis"],
+        )
+    return TaskCompletionStatus()
 
 
 def index_daily_recap_documents(
@@ -338,41 +340,35 @@ def fields = [
 
     initial_wait = 0.01 if testing else 60.0
     time.sleep(initial_wait)
-    get_task_info = retrieve_task_info(get_task_status(task_id, es))
+    task_info = retrieve_task_info(get_task_status(task_id, es))
     iterations_count = 0
     estimated_time_remaining = compute_estimated_remaining_time(
-        initial_wait,
-        get_task_info["start_time_millis"],
-        get_task_info["created"],
-        get_task_info["total"],
+        initial_wait, task_info
     )
-    while not get_task_info["completed"]:
+    while not task_info.completed:
         logger.info(
-            f"Task progress: {get_task_info['created']}/{get_task_info['total']} documents. "
+            f"Task progress: {task_info.created}/{task_info.total} documents. "
             f"Estimated time to finish: {estimated_time_remaining} seconds."
         )
-        task_info = get_task_status(task_id, es)
-        get_task_info = retrieve_task_info(task_info)
+        task_status = get_task_status(task_id, es)
+        task_info = retrieve_task_info(task_status)
         time.sleep(estimated_time_remaining)
-        if task_info and not get_task_info["completed"]:
+        if task_info and not task_info.completed:
             estimated_time_remaining = compute_estimated_remaining_time(
-                initial_wait,
-                get_task_info["start_time_millis"],
-                get_task_info["created"],
-                get_task_info["total"],
+                initial_wait, task_info
             )
         if not task_info:
             iterations_count += 1
         if iterations_count > 10:
             logger.error(
                 "Re_index alert sweep index task has failed: %s/%s",
-                get_task_info["created"],
-                get_task_info["total"],
+                task_info.created,
+                task_info.total,
             )
             break
 
     r.delete("alert_sweep:task_id")
-    return get_task_info["total"]
+    return task_info.total
 
 
 def should_docket_hit_be_included(
diff --git a/cl/alerts/utils.py b/cl/alerts/utils.py
index 66047e5fe9..4b97fc7ba8 100644
--- a/cl/alerts/utils.py
+++ b/cl/alerts/utils.py
@@ -25,6 +25,14 @@ class DocketAlertReportObject:
     docket: Docket
 
 
+@dataclass
+class TaskCompletionStatus:
+    completed: bool = False
+    created: int = 0
+    total: int = 0
+    start_time_millis: int | None = None
+
+
 class OldAlertReport:
     def __init__(self):
         self.old_alerts = []

From 3b153d2e4a02bbc22f13c017ea56877ad8b60fbe Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 18 Oct 2024 16:24:47 -0400
Subject: [PATCH 32/33] feat(lib): Introduces EsMainQueries Dataclass

dds a new dataclass to encapsulate common Elasticsearch queries used throughout the codebase. This centralizes query definitions, making it easier to maintain and update them.

Updates the `build_es_base_query` method to return an instance of `EsMainQueries` instead of a tuple. This ensures consistent query structure and simplifies future modifications.
---
 .../commands/cl_send_recap_alerts.py          |  5 +-
 .../commands/clean_up_search_alerts.py        |  3 +-
 .../commands/ready_mix_cases_project.py       |  3 +-
 cl/lib/elasticsearch_utils.py                 | 63 ++++++++++++-------
 cl/lib/types.py                               |  9 +++
 cl/search/documents.py                        |  4 +-
 cl/search/tests/tests_es_oral_arguments.py    |  5 +-
 cl/search/tests/tests_es_person.py            |  6 +-
 8 files changed, 65 insertions(+), 33 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_recap_alerts.py b/cl/alerts/management/commands/cl_send_recap_alerts.py
index 65d88bf7b6..a38ff91f8c 100644
--- a/cl/alerts/management/commands/cl_send_recap_alerts.py
+++ b/cl/alerts/management/commands/cl_send_recap_alerts.py
@@ -564,9 +564,8 @@ def query_and_send_alerts(
     alerts_sent_count = 0
     now_time = datetime.datetime.now()
     for user in alert_users:
-        if rate == Alert.REAL_TIME:
-            if not user.profile.is_member:
-                continue
+        if rate == Alert.REAL_TIME and not user.profile.is_member:
+            continue
         alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP)
         logger.info(f"Running alerts for user '{user}': {alerts}")
 
diff --git a/cl/alerts/management/commands/clean_up_search_alerts.py b/cl/alerts/management/commands/clean_up_search_alerts.py
index cf1ceb2f54..df8a28b2d6 100644
--- a/cl/alerts/management/commands/clean_up_search_alerts.py
+++ b/cl/alerts/management/commands/clean_up_search_alerts.py
@@ -75,7 +75,8 @@ def validate_queries_syntax(options: OptionsType) -> None:
         if search_form.is_valid():
             cd = search_form.cleaned_data
             try:
-                s, _, _ = build_es_base_query(search_query, cd)
+                es_queries = build_es_base_query(search_query, cd)
+                s = es_queries.search_query
                 s = s.extra(size=0)
                 s.execute().to_dict()
                 # Waiting between requests to avoid hammering ES too quickly.
diff --git a/cl/corpus_importer/management/commands/ready_mix_cases_project.py b/cl/corpus_importer/management/commands/ready_mix_cases_project.py
index eabb93f4a4..32c9db7ae3 100644
--- a/cl/corpus_importer/management/commands/ready_mix_cases_project.py
+++ b/cl/corpus_importer/management/commands/ready_mix_cases_project.py
@@ -198,7 +198,8 @@ def query_results_in_es(options):
     }
 
     search_query = DocketDocument.search()
-    s, _ = build_es_base_query(search_query, cd)
+    es_queries = build_es_base_query(search_query, cd)
+    s = es_queries.search_query
     s = s.extra(size=options["results_size"])
     response = s.execute().to_dict()
     extracted_data = [
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index d0d8db41b1..c12b9c0058 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -35,6 +35,7 @@
     ApiPositionMapping,
     BasePositionMapping,
     CleanData,
+    EsMainQueries,
     ESRangeQueryParams,
 )
 from cl.lib.utils import (
@@ -1114,18 +1115,20 @@ def build_es_base_query(
     child_highlighting: bool = True,
     api_version: Literal["v3", "v4"] | None = None,
     alerts: bool = False,
-) -> tuple[Search, QueryString | None, QueryString | None]:
+) -> EsMainQueries:
     """Builds filters and fulltext_query based on the given cleaned
      data and returns an elasticsearch query.
 
     :param search_query: The Elasticsearch search query object.
     :param cd: The cleaned data object containing the query and filters.
-    :param child_highlighting: Whether highlighting should be enabled in child docs.
+    :param child_highlighting: Whether highlighting should be enabled in child
+    docs.
     :param api_version: Optional, the request API version.
     :param alerts: If highlighting is being applied to search Alerts hits.
-    :return: A three-tuple, the Elasticsearch search query object and an ES
-    QueryString for child documents or None if there is no need to query
-    child documents and a QueryString for parent documents or None.
+    :return: An `EsMainQueries` object containing the Elasticsearch search
+    query object and an ES QueryString for child documents or None if there is
+    no need to query child documents and a QueryString for parent documents or
+    None.
     """
 
     main_query = None
@@ -1244,10 +1247,10 @@ def build_es_base_query(
                         api_version=api_version,
                     )
                 )
-                return (
-                    search_query.query(main_query),
-                    child_docs_query,
-                    parent_query,
+                return EsMainQueries(
+                    search_query=search_query.query(main_query),
+                    parent_query=parent_query,
+                    child_query=child_docs_query,
                 )
 
             opinion_search_fields = SEARCH_OPINION_QUERY_FIELDS
@@ -1291,7 +1294,11 @@ def build_es_base_query(
         match_all_query = get_match_all_query(
             cd, search_query, api_version, child_highlighting
         )
-        return match_all_query, child_docs_query, parent_query
+        return EsMainQueries(
+            search_query=match_all_query,
+            parent_query=parent_query,
+            child_query=child_docs_query,
+        )
 
     if plain_doc:
         # Combine the filters and string query for plain documents like Oral
@@ -1300,7 +1307,11 @@ def build_es_base_query(
             cd, filters, string_query, api_version
         )
 
-    return search_query.query(main_query), child_docs_query, parent_query
+    return EsMainQueries(
+        search_query=search_query.query(main_query),
+        parent_query=parent_query,
+        child_query=child_docs_query,
+    )
 
 
 def build_has_parent_parties_query(
@@ -1442,7 +1453,8 @@ def get_facet_dict_for_search_query(
     """
 
     cd["just_facets_query"] = True
-    search_query, _, _ = build_es_base_query(search_query, cd)
+    es_queries = build_es_base_query(search_query, cd)
+    search_query = es_queries.search_query
     search_query.aggs.bucket("status", A("terms", field="status.raw"))
     search_query = search_query.extra(size=0)
     response = search_query.execute()
@@ -1464,7 +1476,9 @@ def build_es_main_query(
     applicable.
     """
     search_query_base = search_query
-    search_query, child_docs_query, _ = build_es_base_query(search_query, cd)
+    es_queries = build_es_base_query(search_query, cd)
+    search_query = es_queries.search_query
+    child_docs_query = es_queries.child_query
     top_hits_limit = 5
     child_docs_count_query = None
     match cd["type"]:
@@ -2390,7 +2404,9 @@ def build_search_feed_query(
     hl_field = "text"
     if cd["type"] == SEARCH_TYPES.RECAP:
         hl_field = "plain_text"
-    s, child_docs_query, _ = build_es_base_query(search_query, cd)
+    es_queries = build_es_base_query(search_query, cd)
+    s = es_queries.search_query
+    child_docs_query = es_queries.child_query
     if jurisdiction or cd["type"] == SEARCH_TYPES.RECAP:
         # An Opinion Jurisdiction feed or RECAP Search displays child documents
         # Eliminate items that lack the ordering field and apply highlighting
@@ -2952,9 +2968,11 @@ def do_es_api_query(
     """
 
     try:
-        s, child_docs_query, _ = build_es_base_query(
+        es_queries = build_es_base_query(
             search_query, cd, cd["highlight"], api_version
         )
+        s = es_queries.search_query
+        child_docs_query = es_queries.child_query
     except (
         UnbalancedParenthesesQuery,
         UnbalancedQuotesQuery,
@@ -3122,8 +3140,8 @@ def do_es_alert_estimation_query(
         days=int(day_count)
     )
     cd[before_field] = None
-    estimation_query, _, _ = build_es_base_query(search_query, cd)
-
+    es_queries = build_es_base_query(search_query, cd)
+    estimation_query = es_queries.search_query
     if cd["type"] == SEARCH_TYPES.RECAP:
         # The RECAP estimation query consists of two requests: one to estimate
         # Docket hits and one to estimate RECAPDocument hits.
@@ -3144,7 +3162,8 @@ def do_es_alert_estimation_query(
         multi_search = multi_search.add(main_doc_count_query)
 
         # Build RECAPDocuments count query.
-        _, child_docs_query, _ = build_es_base_query(search_query, cd)
+        es_queries = build_es_base_query(search_query, cd)
+        child_docs_query = es_queries.child_query
         child_docs_count_query = build_child_docs_query(child_docs_query, cd)
         child_total = 0
         if child_docs_count_query:
@@ -3186,10 +3205,10 @@ def do_es_sweep_alert_query(
         cd = search_form.cleaned_data
     else:
         return None, None, None
-
-    s, child_query, parent_query = build_es_base_query(
-        search_query, cd, True, alerts=True
-    )
+    es_queries = build_es_base_query(search_query, cd, True, alerts=True)
+    s = es_queries.search_query
+    parent_query = es_queries.parent_query
+    child_query = es_queries.child_query
     main_query = add_es_highlighting(s, cd, alerts=True)
     main_query = main_query.sort(build_sort_results(cd))
     main_query = main_query.extra(
diff --git a/cl/lib/types.py b/cl/lib/types.py
index 82d6131b5e..ff257574e9 100644
--- a/cl/lib/types.py
+++ b/cl/lib/types.py
@@ -4,6 +4,8 @@
 from typing import Any, Callable, Dict, List, NotRequired, TypedDict, Union
 
 from django.http import HttpRequest
+from django_elasticsearch_dsl.search import Search
+from elasticsearch_dsl.query import QueryString
 
 from cl.users.models import User
 
@@ -190,6 +192,13 @@ def get_db_to_dataclass_map(self):
         return self.__db_to_dataclass_map
 
 
+@dataclass
+class EsMainQueries:
+    search_query: Search
+    parent_query: QueryString | None = None
+    child_query: QueryString | None = None
+
+
 @dataclass
 class ApiPositionMapping(BasePositionMapping):
     position_type_dict: defaultdict[int, list[str]] = field(
diff --git a/cl/search/documents.py b/cl/search/documents.py
index b62293ac90..59d8327875 100644
--- a/cl/search/documents.py
+++ b/cl/search/documents.py
@@ -365,8 +365,8 @@ def prepare_percolator_query(self, instance):
 
         cd = search_form.cleaned_data
         search_query = AudioDocument.search()
-        query, _, _ = build_es_base_query(search_query, cd)
-        return query.to_dict()["query"]
+        es_queries = build_es_base_query(search_query, cd)
+        return es_queries.search_query.to_dict()["query"]
 
 
 class ES_CHILD_ID:
diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py
index 3ca921704d..5878299362 100644
--- a/cl/search/tests/tests_es_oral_arguments.py
+++ b/cl/search/tests/tests_es_oral_arguments.py
@@ -984,8 +984,9 @@ def confirm_query_matched(response, query_id) -> bool:
     @staticmethod
     def save_percolator_query(cd):
         search_query = AudioDocument.search()
-        query, _, _ = build_es_base_query(search_query, cd)
-        query_dict = query.to_dict()["query"]
+        es_queries = build_es_base_query(search_query, cd)
+        search_query = es_queries.search_query
+        query_dict = search_query.to_dict()["query"]
         percolator_query = AudioPercolator(
             percolator_query=query_dict, rate=Alert.REAL_TIME
         )
diff --git a/cl/search/tests/tests_es_person.py b/cl/search/tests/tests_es_person.py
index 5f6a195849..12a0b7f1fb 100644
--- a/cl/search/tests/tests_es_person.py
+++ b/cl/search/tests/tests_es_person.py
@@ -1342,7 +1342,8 @@ def test_has_child_filters(self) -> None:
             "type": SEARCH_TYPES.PEOPLE,
         }
         s = PersonDocument.search()
-        main_query, _, _ = build_es_base_query(s, cd)
+        es_queries = build_es_base_query(s, cd)
+        main_query = es_queries.search_query
         self.assertEqual(main_query.count(), 2)
 
         # Query by parent field dob_state and child field selection_method.
@@ -1352,7 +1353,8 @@ def test_has_child_filters(self) -> None:
             "type": SEARCH_TYPES.PEOPLE,
         }
         s = PersonDocument.search()
-        main_query, _, _ = build_es_base_query(s, cd)
+        es_queries = build_es_base_query(s, cd)
+        main_query = es_queries.search_query
         self.assertEqual(main_query.count(), 1)
 
         position_5.delete()

From eb8874e718ae18cddd700e2e2c93e53d99c24849 Mon Sep 17 00:00:00 2001
From: ttys0dev <126845556+ttys0dev@users.noreply.github.com>
Date: Sat, 5 Oct 2024 16:42:09 -0600
Subject: [PATCH 33/33] Fix check for restricted attachments

---
 cl/recap/mergers.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cl/recap/mergers.py b/cl/recap/mergers.py
index 6eccba198e..80244e7865 100644
--- a/cl/recap/mergers.py
+++ b/cl/recap/mergers.py
@@ -1792,6 +1792,15 @@ async def merge_attachment_page_data(
         if not all(sanity_checks):
             continue
 
+        # Missing on some restricted docs (see Juriscraper)
+        # Attachment 0 may not have page count since it is the main rd.
+        if (
+            "page_count" in attachment
+            and attachment["page_count"] is None
+            and attachment["attachment_number"] != 0
+        ):
+            continue
+
         # Appellate entries with attachments don't have a main RD, transform it
         # to an attachment. In ACMS attachment pages, all the documents use the
         # same pacer_doc_id, so we need to make sure only one is matched to the