Skip to content

Commit

Permalink
Merge branch 'main' into update-cl-xml-from-cap
Browse files Browse the repository at this point in the history
  • Loading branch information
jtmst authored Oct 28, 2024
2 parents 931f2cd + 5ac88cd commit ebf53e8
Show file tree
Hide file tree
Showing 38 changed files with 3,686 additions and 566 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ jobs:
- name: Watch cl-retry-webhooks rollout status
run: kubectl rollout status -n ${{ env.EKS_NAMESPACE }} deployment/cl-retry-webhooks

- name: Rollout cl-send-rt-percolator-alerts
run: kubectl set image -n ${{ env.EKS_NAMESPACE }} deployment/cl-send-rt-percolator-alerts cl-send-rt-percolator-alerts=freelawproject/courtlistener:${{ steps.vars.outputs.sha_short }}-prod
- name: Watch cl-send-rt-percolator-alerts rollout status
run: kubectl rollout status -n ${{ env.EKS_NAMESPACE }} deployment/cl-send-rt-percolator-alerts

- name: Rollout cl-es-sweep-indexer
run: kubectl set image -n ${{ env.EKS_NAMESPACE }} deployment/cl-es-sweep-indexer sweep-indexer=freelawproject/courtlistener:${{ steps.vars.outputs.sha_short }}-prod
- name: Watch cl-es-sweep-indexer rollout status
Expand Down
59 changes: 44 additions & 15 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,39 @@ on:
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Check out CourtListener
uses: actions/checkout@v4
with:
path: courtlistener

# Build docker image
- name: Set up docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build docker image
uses: docker/build-push-action@v6
with:
context: ./courtlistener
file: ./courtlistener/docker/django/Dockerfile
push: false # This image is for testing only
tags: courtlistener:latest
outputs: type=docker,dest=/tmp/courtlistener.tar
build-args: |
BUILD_ENV=dev
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: courtlistener
path: /tmp/courtlistener.tar
compression-level: 0
retention-days: 1

test:
runs-on: ubuntu-latest
needs: build
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -50,30 +83,25 @@ jobs:
- name: Echo github actor name for debugging
run: echo ${{ github.actor }}

# Build and cache docker images so tests are always run on the latest
# dependencies
- name: Set up docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: network=host
- name: Prebuild docker images
uses: docker/build-push-action@v6
with:
context: ./courtlistener
file: ./courtlistener/docker/django/Dockerfile
load: true
build-args: |
BUILD_ENV=dev
cache-from: type=gha
cache-to: type=gha,mode=max

# Prepare Docker images
- name: Pull docker images
working-directory: courtlistener/docker/courtlistener
run: docker compose pull --quiet --ignore-buildable
- name: Build docker images
- name: Download courtlistener image
uses: actions/download-artifact@v4
with:
name: courtlistener
path: /tmp
- name: Load courtlistener image
run: docker load --input /tmp/courtlistener.tar
- name: Build cl-postgresql images # TODO: replace with an off-the-shelf image. Until then, build now for later steps.
working-directory: courtlistener/docker/courtlistener
run: docker compose build
run: docker compose build cl-postgresql
- name: List docker images
run: docker image ls -a --no-trunc

Expand All @@ -82,7 +110,8 @@ jobs:
run: docker network create -d bridge --attachable cl_net_overlay
- name: Start docker containers
working-directory: courtlistener/docker/courtlistener
run: docker compose -f docker-compose.yml -f docker-compose.tmpfs.yml up -d --no-build --pull=never
run: > # don't build, rather use loaded image from build step, specified by merging overriding config
docker compose -f docker-compose.yml -f docker-compose.tmpfs.yml -f <(echo 'services: { cl-django: { image: "courtlistener" }, cl-celery: { image: "courtlistener" } }') up -d --no-build --pull=never
- name: List docker containers
run: docker ps -a --no-trunc
- name: Show the docker startup logs
Expand Down
10 changes: 8 additions & 2 deletions cl/alerts/management/commands/cl_send_recap_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytz
from asgiref.sync import async_to_sync
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.http import QueryDict
from django.utils import timezone
from elasticsearch import Elasticsearch
Expand All @@ -21,8 +22,8 @@
from cl.alerts.utils import (
TaskCompletionStatus,
add_document_hit_to_alert_set,
alert_hits_limit_reached,
has_document_alert_hit_been_triggered,
scheduled_alert_hits_limit_reached,
)
from cl.api.models import WebhookEventType
from cl.api.tasks import send_search_alert_webhook_es
Expand Down Expand Up @@ -625,6 +626,9 @@ def query_and_schedule_alerts(
"""

alert_users = User.objects.filter(alerts__rate=rate).distinct()
docket_content_type = ContentType.objects.get(
app_label="search", model="docket"
)
for user in alert_users:
alerts = user.alerts.filter(rate=rate, alert_type=SEARCH_TYPES.RECAP)
logger.info(f"Running '{rate}' alerts for user '{user}': {alerts}")
Expand All @@ -644,7 +648,7 @@ def query_and_schedule_alerts(
continue
for hit in results_to_send:
# Schedule DAILY, WEEKLY and MONTHLY Alerts
if alert_hits_limit_reached(alert.pk, user.pk):
if scheduled_alert_hits_limit_reached(alert.pk, user.pk):
# Skip storing hits for this alert-user combination because
# the SCHEDULED_ALERT_HITS_LIMIT has been reached.
continue
Expand All @@ -662,6 +666,8 @@ def query_and_schedule_alerts(
user=user,
alert=alert,
document_content=hit_copy.to_dict(),
content_type=docket_content_type,
object_id=hit_copy.docket_id,
)
)
# Send webhooks
Expand Down
36 changes: 36 additions & 0 deletions cl/alerts/management/commands/cl_send_rt_percolator_alerts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import time

from cl.alerts.management.commands.cl_send_scheduled_alerts import (
query_and_send_alerts_by_rate,
)
from cl.alerts.models import Alert
from cl.lib.command_utils import VerboseCommand


class Command(VerboseCommand):
help = """Send real-time alerts scheduled by the Percolator every 5 minutes.
This process is performed to accumulate alerts that can be grouped into a
single email if they belong to the same user. """

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = {}

def add_arguments(self, parser):
parser.add_argument(
"--testing-mode",
action="store_true",
help="Use this flag for testing purposes.",
)

def handle(self, *args, **options):
super().handle(*args, **options)
testing_mode = options.get("testing_mode", False)
while True:
query_and_send_alerts_by_rate(Alert.REAL_TIME)
if testing_mode:
# Perform only 1 iteration for testing purposes.
break

# Wait for 5 minutes.
time.sleep(300)
130 changes: 86 additions & 44 deletions cl/alerts/management/commands/cl_send_scheduled_alerts.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import datetime
from collections import defaultdict
from typing import DefaultDict
from typing import Any, DefaultDict

import waffle
from asgiref.sync import async_to_sync
from django.conf import settings

from cl.alerts.models import (
SCHEDULED_ALERT_HIT_STATUS,
Expand All @@ -13,6 +14,8 @@
from cl.alerts.tasks import send_search_alert_emails
from cl.alerts.utils import InvalidDateError, override_alert_query
from cl.lib.command_utils import VerboseCommand, logger
from cl.search.models import SEARCH_TYPES
from cl.search.types import ESDictDocument
from cl.stats.utils import tally_stat

DAYS_TO_DELETE = 90
Expand Down Expand Up @@ -50,6 +53,30 @@ def get_cut_off_date(rate: str, d: datetime.date) -> datetime.date | None:
return cut_off_date


def merge_alert_child_documents(
documents: list[ESDictDocument],
) -> ESDictDocument:
"""Merge multiple child hits within the same main document.
:param documents: A list of document hits.
:return: A document dictionary where documents has been merged.
"""

main_document = documents[0].copy()
child_docs: list[ESDictDocument] = []
for doc in documents:
if "child_docs" in doc:
child_docs.extend(doc["child_docs"])
if len(child_docs) >= settings.RECAP_CHILD_HITS_PER_RESULT:
# Nested child limits reached. Set child_remaining True to show
# the "Show the view additional hits button"
main_document["child_remaining"] = True
break

if child_docs:
main_document["child_docs"] = child_docs
return main_document


def query_and_send_alerts_by_rate(rate: str) -> None:
"""Query and send alerts per user.
Expand All @@ -59,58 +86,77 @@ def query_and_send_alerts_by_rate(rate: str) -> None:

alerts_sent_count = 0
now_time = datetime.datetime.now()
alerts_to_update = []
scheduled_hits_rate = ScheduledAlertHit.objects.filter(
alert__rate=rate, hit_status=SCHEDULED_ALERT_HIT_STATUS.SCHEDULED
).select_related("user", "alert")

# Create a nested dictionary structure to hold the groups.
grouped_hits: DefaultDict[
int, DefaultDict[Alert, list[ScheduledAlertHit]]
] = defaultdict(lambda: defaultdict(list))

# Group scheduled hits by User and Alert.
for hit in scheduled_hits_rate:
user_id = hit.user.pk
alert = hit.alert
grouped_hits[user_id][alert].append(hit)

for user_id, alerts in grouped_hits.items():
# Get unique alert users with scheduled alert hits
user_ids = (
ScheduledAlertHit.objects.filter(
alert__rate=rate, hit_status=SCHEDULED_ALERT_HIT_STATUS.SCHEDULED
)
.values_list("user", flat=True)
.distinct()
)

for user_id in user_ids:
# Query ScheduledAlertHits for every user.
scheduled_hits = ScheduledAlertHit.objects.filter(
user_id=user_id,
alert__rate=rate,
hit_status=SCHEDULED_ALERT_HIT_STATUS.SCHEDULED,
).select_related("user", "alert")

# Group scheduled hits by Alert and the main_doc_id
grouped_hits: DefaultDict[
Alert, DefaultDict[int, list[dict[str, Any]]]
] = defaultdict(lambda: defaultdict(list))
alerts_to_update = set()
for hit in scheduled_hits:
alert = hit.alert
doc_content = json_date_parser(hit.document_content)
match hit.alert.alert_type:
case SEARCH_TYPES.RECAP:
main_doc_id = doc_content.get("docket_id")
case SEARCH_TYPES.ORAL_ARGUMENT:
main_doc_id = doc_content.get("id")
case _:
# Not supported alert type.
continue
grouped_hits[alert][main_doc_id].append(doc_content)
alerts_to_update.add(alert.pk)

# Merge child documents with the same main_doc_id if the document dict
# contains the child_docs key.
merged_hits: DefaultDict[Alert, list[dict[str, Any]]] = defaultdict(
list
)
for alert, document_groups in grouped_hits.items():
for documents in document_groups.values():
merged_hits[alert].append(
merge_alert_child_documents(documents)
)

hits = []
for alert, results in alerts.items():
for alert, documents in merged_hits.items():
search_type = alert.alert_type
documents = []
for result in results:
documents.append(json_date_parser(result.document_content))

alerts_to_update.append(alert.pk)

# Override order_by to show the latest items when clicking the
# "View Full Results" button.
cut_off_date = get_cut_off_date(rate, now_time.date())
qd = override_alert_query(alert, cut_off_date)
alert.query_run = qd.urlencode() # type: ignore
hits.append(
(
alert,
search_type,
documents,
len(documents),
)
)
hits.append((alert, search_type, documents, len(documents)))

if hits:
send_search_alert_emails.delay(
[(user_id, hits)], scheduled_alert=True
)
alerts_sent_count += 1

# Update Alert's date_last_hit in bulk.
Alert.objects.filter(id__in=alerts_to_update).update(
date_last_hit=now_time
)
# Update Alert's date_last_hit in bulk for this user's alerts
Alert.objects.filter(id__in=alerts_to_update).update(
date_last_hit=now_time
)

# Update Scheduled alert hits status to "SENT".
scheduled_hits_rate.update(hit_status=SCHEDULED_ALERT_HIT_STATUS.SENT)
# Update Scheduled alert hits status to "SENT" for this user
scheduled_hits.update(hit_status=SCHEDULED_ALERT_HIT_STATUS.SENT)

# Remove old Scheduled alert hits sent, daily.
if rate == Alert.DAILY:
Expand All @@ -124,16 +170,12 @@ def query_and_send_alerts_by_rate(rate: str) -> None:


def send_scheduled_alerts(rate: str) -> None:
if rate == Alert.DAILY:
query_and_send_alerts_by_rate(Alert.DAILY)
elif rate == Alert.WEEKLY:
query_and_send_alerts_by_rate(Alert.WEEKLY)
elif rate == Alert.MONTHLY:
if rate == Alert.MONTHLY:
if datetime.date.today().day > 28:
raise InvalidDateError(
"Monthly alerts cannot be run on the 29th, 30th or 31st."
)
query_and_send_alerts_by_rate(Alert.MONTHLY)
query_and_send_alerts_by_rate(rate)


def delete_old_scheduled_alerts() -> int:
Expand Down
Loading

0 comments on commit ebf53e8

Please sign in to comment.