diff --git a/cl/corpus_importer/management/commands/make_aws_manifest_files.py b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
index 479708c0d0..ebc4690312 100644
--- a/cl/corpus_importer/management/commands/make_aws_manifest_files.py
+++ b/cl/corpus_importer/management/commands/make_aws_manifest_files.py
@@ -14,45 +14,53 @@
s3_client = boto3.client("s3")
-def get_total_number_of_records(type: str, use_replica: bool = False) -> int:
+def get_total_number_of_records(type: str, options: dict[str, Any]) -> int:
"""
Retrieves the total number of records for a specific data type.
Args:
type (str): The type of data to count. Must be one of the valid values
from the `SEARCH_TYPES` class.
- use_replica (bool, optional): Whether to use the replica database
- connection (default: False).
+ options (dict[str, Any]): A dictionary containing options for filtering
+ the results.
+ - 'use_replica' (bool, optional): Whether to use the replica database
+ connection (default: False).
+ - 'random_sample_percentage' (float, optional): The percentage of
+ records to include in a random sample.
Returns:
int: The total number of records matching the specified data type.
"""
match type:
case SEARCH_TYPES.RECAP_DOCUMENT:
- query = """
- SELECT count(*) AS exact_count
- FROM search_recapdocument
+ base_query = (
+ "SELECT count(*) AS exact_count FROM search_recapdocument"
+ )
+ filter_clause = """
WHERE is_available=True AND page_count>0 AND ocr_status!=1
"""
case SEARCH_TYPES.OPINION:
- query = """
- SELECT count(*) AS exact_count
- FROM search_opinion
- WHERE extracted_by_ocr != true
- """
+ base_query = "SELECT count(*) AS exact_count FROM search_opinion"
+ filter_clause = "WHERE extracted_by_ocr != true"
case SEARCH_TYPES.ORAL_ARGUMENT:
- query = """
- SELECT count(*) AS exact_count
- FROM audio_audio
- WHERE
- local_path_mp3 != '' AND
+ base_query = "SELECT count(*) AS exact_count FROM audio_audio"
+ filter_clause = """WHERE local_path_mp3 != '' AND
download_url != 'https://www.cadc.uscourts.gov/recordings/recordings.nsf/' AND
position('Unavailable' in download_url) = 0 AND
duration > 30
"""
+ if options["random_sample_percentage"]:
+ percentage = options["random_sample_percentage"]
+ base_query = f"{base_query} TABLESAMPLE SYSTEM ({percentage})"
+
+ query = (
+ f"{base_query}\n"
+ if options["all_records"]
+ else f"{base_query}\n {filter_clause}\n"
+ )
with connections[
- "replica" if use_replica else "default"
+ "replica" if options["use_replica"] else "default"
].cursor() as cursor:
cursor.execute(query, [])
result = cursor.fetchone()
@@ -60,7 +68,9 @@ def get_total_number_of_records(type: str, use_replica: bool = False) -> int:
return int(result[0])
-def get_custom_query(type: str, last_pk: str) -> tuple[str, list[Any]]:
+def get_custom_query(
+ type: str, last_pk: str, options: dict[str, Any]
+) -> tuple[str, list[Any]]:
"""
Generates a custom SQL query based on the provided type and optional last
pk.
@@ -69,6 +79,10 @@ def get_custom_query(type: str, last_pk: str) -> tuple[str, list[Any]]:
type (str): Type of data to retrieve.
last_pk (int, optional): Last primary key retrieved in a previous
query. Defaults to None.
+ options (dict[str, Any]): A dictionary containing options for filtering
+ the results.
+ - 'random_sample_percentage' (float, optional): The percentage of
+ records to include in a random sample.
Returns:
tuple[str, list[Any]]: A tuple containing the constructed SQL
@@ -76,50 +90,48 @@ def get_custom_query(type: str, last_pk: str) -> tuple[str, list[Any]]:
the query.
"""
params = []
-
+ random_sample = options["random_sample_percentage"]
match type:
case SEARCH_TYPES.RECAP_DOCUMENT:
base_query = "SELECT id from search_recapdocument"
filter_clause = (
"WHERE is_available=True AND page_count>0 AND ocr_status!=1"
- if not last_pk
- else (
- "WHERE id > %s AND is_available = True AND page_count > 0"
- " AND ocr_status != 1"
- )
)
case SEARCH_TYPES.OPINION:
base_query = "SELECT id from search_opinion"
- filter_clause = (
- "WHERE extracted_by_ocr != true"
- if not last_pk
- else "WHERE id > %s AND extracted_by_ocr != true"
- )
+ filter_clause = "WHERE extracted_by_ocr != true"
case SEARCH_TYPES.ORAL_ARGUMENT:
base_query = "SELECT id from audio_audio"
- no_argument_where_clause = """
+ filter_clause = """
WHERE local_path_mp3 != '' AND
download_url != 'https://www.cadc.uscourts.gov/recordings/recordings.nsf/' AND
position('Unavailable' in download_url) = 0 AND
duration > 30
"""
- where_clause_with_argument = """
- WHERE id > %s AND
- local_path_mp3 != '' AND
- download_url != 'https://www.cadc.uscourts.gov/recordings/recordings.nsf/' AND
- position('Unavailable' in download_url) = 0 AND
- duration > 30
- """
- filter_clause = (
- no_argument_where_clause
- if not last_pk
- else where_clause_with_argument
- )
- if last_pk:
+ if random_sample:
+ base_query = f"{base_query} TABLESAMPLE SYSTEM ({random_sample})"
+
+ if options["all_records"]:
+ filter_clause = ""
+
+ # Using a WHERE clause with `id > last_pk` and a LIMIT clause for batch
+ # retrieval is not suitable for random sampling. The following logic
+ # removes these clauses when retrieving a random sample to ensure all rows
+ # have an equal chance of being selected.
+ if last_pk and not random_sample:
+ filter_clause = (
+ f"WHERE id > %s"
+ if not filter_clause
+ else f"{filter_clause} AND id > %s"
+ )
params.append(last_pk)
- query = f"{base_query}\n {filter_clause}\n ORDER BY id\n LIMIT %s"
+ query = (
+ f"{base_query}\n {filter_clause}"
+ if random_sample
+ else f"{base_query}\n {filter_clause}\n ORDER BY id\n LIMIT %s"
+ )
return query, params
@@ -170,6 +182,27 @@ def add_arguments(self, parser: CommandParser):
default=False,
help="Use this flag to run the queries in the replica db",
)
+ parser.add_argument(
+ "--file-name",
+ type=str,
+ default=None,
+ help="Custom name for the output files. If not provided, a default "
+ "name will be used.",
+ )
+ parser.add_argument(
+ "--random-sample-percentage",
+ type=float,
+ default=None,
+ help="Specifies the proportion of the table to be sampled (between "
+ "0.0 and 100.0). Use this flag to retrieve a random set of records.",
+ )
+ parser.add_argument(
+ "--all-records",
+ action="store_true",
+ default=False,
+ help="Use this flag to retrieve all records from the table without"
+ " applying any filters.",
+ )
def handle(self, *args, **options):
r = get_redis_interface("CACHE")
@@ -188,7 +221,7 @@ def handle(self, *args, **options):
)
if not total_number_of_records:
total_number_of_records = get_total_number_of_records(
- record_type, options["use_replica"]
+ record_type, options
)
r.hset(
f"{record_type}_import_status",
@@ -200,12 +233,17 @@ def handle(self, *args, **options):
r.hget(f"{record_type}_import_status", "next_iteration_counter")
or 0
)
+ file_name = (
+ options["file_name"]
+ if options["file_name"]
+ else f"{record_type}_filelist"
+ )
while True:
query, params = get_custom_query(
- options["record_type"],
- last_pk,
+ options["record_type"], last_pk, options
)
- params.append(options["query_batch_size"])
+ if not options["random_sample_percentage"]:
+ params.append(options["query_batch_size"])
with connections[
"replica" if options["use_replica"] else "default"
@@ -226,22 +264,37 @@ def handle(self, *args, **options):
extrasaction="ignore",
)
for row in batched(rows, options["lambda_record_size"]):
- query_dict = {
- "bucket": bucket_name,
- "file_name": (
+ if options["random_sample_percentage"]:
+ # Create an underscore-separated file name that lambda
+ # can split and use as part of batch processing.
+ ids = [str(r[0]) for r in row]
+ content = "_".join(ids)
+ else:
+ content = (
f"{row[0][0]}_{row[-1][0]}"
if len(row) > 1
else f"{row[0][0]}"
- ),
+ )
+ query_dict = {
+ "bucket": bucket_name,
+ "file_name": content,
}
writer.writerow(query_dict)
s3_client.put_object(
- Key=f"{record_type}_filelist_{counter}.csv",
+ Key=f"{file_name}_{counter}.csv",
Bucket=bucket_name,
Body=csvfile.getvalue().encode("utf-8"),
)
+ if options["random_sample_percentage"]:
+ # Due to the non-deterministic nature of random sampling,
+ # storing data to recover the query for future executions
+ # wouldn't be meaningful. Random queries are unlikely to
+ # produce the same results on subsequent runs.
+ logger.info(f"Finished processing {record_count} records")
+ break
+
counter += 1
last_pk = rows[-1][0]
records_processed = int(
diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index b67396e296..4b4c686819 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -14,7 +14,7 @@
from elasticsearch_dsl import AttrDict, AttrList
from cl.search.constants import ALERTS_HL_TAG, SEARCH_HL_TAG
-from cl.search.models import SEARCH_TYPES, Docket, DocketEntry
+from cl.search.models import SEARCH_TYPES, Court, Docket, DocketEntry
register = template.Library()
@@ -297,3 +297,33 @@ def alerts_supported(context: RequestContext, search_type: str) -> str:
and waffle.flag_is_active(request, "recap-alerts-active")
)
)
+
+
+@register.filter
+def group_courts(courts: list[Court], num_columns: int) -> list:
+ """Divide courts in equal groupings while keeping related courts together
+
+ :param courts: Courts to group.
+ :param num_columns: Number of groups wanted
+ :return: The courts grouped together
+ """
+
+ column_len = len(courts) // num_columns
+ remainder = len(courts) % num_columns
+
+ groups = []
+ start = 0
+ for index in range(num_columns):
+ # Calculate the end index for this chunk
+ end = start + column_len + (1 if index < remainder else 0)
+
+ # Find the next COLR as a starting point (Court of last resort)
+ COLRs = [Court.TERRITORY_SUPREME, Court.STATE_SUPREME]
+ while end < len(courts) and courts[end].jurisdiction not in COLRs:
+ end += 1
+
+ # Create the column and add it to result
+ groups.append(courts[start:end])
+ start = end
+
+ return groups
diff --git a/cl/lib/command_utils.py b/cl/lib/command_utils.py
index d246288ac5..2c3797f9f5 100644
--- a/cl/lib/command_utils.py
+++ b/cl/lib/command_utils.py
@@ -17,6 +17,9 @@ def handle(self, *args, **options):
logger.setLevel(logging.INFO)
elif verbosity > 1:
logger.setLevel(logging.DEBUG)
+ # This will make juriscraper's logger accept most logger calls.
+ juriscraper_logger = logging.getLogger("juriscraper")
+ juriscraper_logger.setLevel(logging.DEBUG)
class CommandUtils:
diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py
index 5a3fdb6afb..affb89318e 100644
--- a/cl/lib/search_utils.py
+++ b/cl/lib/search_utils.py
@@ -233,8 +233,8 @@ def merge_form_with_courts(
}
bap_bundle = []
b_bundle = []
- state_bundle: List = []
- state_bundles = []
+ states = []
+ territories = []
for court in courts:
if court.jurisdiction == Court.FEDERAL_APPELLATE:
court_tabs["federal"].append(court)
@@ -247,15 +247,9 @@ def merge_form_with_courts(
else:
b_bundle.append(court)
elif court.jurisdiction in Court.STATE_JURISDICTIONS:
- # State courts get bundled by supreme courts
- if court.jurisdiction == Court.STATE_SUPREME:
- # Whenever we hit a state supreme court, we append the
- # previous bundle and start a new one.
- if state_bundle:
- state_bundles.append(state_bundle)
- state_bundle = [court]
- else:
- state_bundle.append(court)
+ states.append(court)
+ elif court.jurisdiction in Court.TERRITORY_JURISDICTIONS:
+ territories.append(court)
elif court.jurisdiction in [
Court.FEDERAL_SPECIAL,
Court.COMMITTEE,
@@ -265,18 +259,11 @@ def merge_form_with_courts(
]:
court_tabs["special"].append(court)
- # append the final state bundle after the loop ends. Hack?
- state_bundles.append(state_bundle)
-
# Put the bankruptcy bundles in the courts dict
if bap_bundle:
court_tabs["bankruptcy_panel"] = [bap_bundle]
court_tabs["bankruptcy"] = [b_bundle]
-
- # Divide the state bundles into the correct partitions
- court_tabs["state"].append(state_bundles[:17])
- court_tabs["state"].append(state_bundles[17:34])
- court_tabs["state"].append(state_bundles[34:])
+ court_tabs["state"] = [states, territories]
return court_tabs, court_count_human, court_count
diff --git a/cl/opinion_page/templates/docket_tabs.html b/cl/opinion_page/templates/docket_tabs.html
index b284b2bef4..22d274cb68 100644
--- a/cl/opinion_page/templates/docket_tabs.html
+++ b/cl/opinion_page/templates/docket_tabs.html
@@ -126,8 +126,6 @@
{{ docket.court }}
{% endif %}
- {% with og_info=docket.originating_court_information %}
- {% with bankr_info=docket.bankruptcy_information %}
{% if docket.panel_str %}
@@ -282,6 +280,7 @@
{{ docket.court }}
{% endif %}
+ {% with bankr_info=docket.bankruptcy_information %}
{% if bankr_info %}
Bankruptcy Information
@@ -327,7 +326,10 @@ Bankruptcy Information
{% endif %}
{% endif %}
+ {% endwith %}{# No more bankr_info variable #}
+
+ {% with og_info=docket.originating_court_information %}
{% if og_info %}
Originating Court Information
@@ -423,7 +425,6 @@ Originating Court Information
{{ og_info.date_received_coa }}
{% endif %}
- {% endwith %}{# No more bankr_info variable #}
{% endwith %}{# No more og_info variable #}
diff --git a/cl/search/templates/includes/jurisdiction_picker_modal.html b/cl/search/templates/includes/jurisdiction_picker_modal.html
index 842337c0da..1cf3800812 100644
--- a/cl/search/templates/includes/jurisdiction_picker_modal.html
+++ b/cl/search/templates/includes/jurisdiction_picker_modal.html
@@ -1,4 +1,5 @@
{% load partition_util %}
+{% load extras %}
@@ -166,24 +167,31 @@
{% endif %}
{% if v == SEARCH_TYPES.OPINION or v == SEARCH_TYPES.PEOPLE %}
-
-
- {% for col_bundle in courts.state %}
-
- {% for court_bundle in col_bundle %}
- {% for court in court_bundle %}
- {% if court.jurisdiction == 'S' %}
- {% include "includes/court_checkbox.html" %}
- {% else %}
- {% include "includes/court_checkbox.html" with indent=True %}
- {% endif %}
- {% endfor %}
+
+ {% for group in courts.state %}
+ {% if forloop.counter == 1 %}
+
State Courts
+ {% elif forloop.counter == 2 %}
+
+
U.S. Territory Courts
+ {% endif %}
+
+ {% for col_bundle in group|group_courts:3 %}
+
+ {% for court in col_bundle %}
+ {% if court.jurisdiction == 'S' %}
+ {% include "includes/court_checkbox.html" %}
+ {% else %}
+ {% include "includes/court_checkbox.html" with indent=True %}
+ {% endif %}
+ {% endfor %}
+
{% endfor %}
- {% endfor %}
+ {% endfor %}
-
{% endif %}
+
{% if v != SEARCH_TYPES.ORAL_ARGUMENT %}
{# Regroup into closed/open courts #}
diff --git a/cl/settings/project/logging.py b/cl/settings/project/logging.py
index 226cecbee4..5c617ea8e0 100644
--- a/cl/settings/project/logging.py
+++ b/cl/settings/project/logging.py
@@ -78,6 +78,15 @@ def skip_unreadable_post(record):
},
# This is the one that's used practically everywhere in the code.
"cl": {"handlers": ["console"], "level": "INFO", "propagate": True},
+ # CRITICAL is the highest log level, which will make the logger
+ # reject most logger calls from juriscraper: debug, info and warning
+ # This level may be modified on a VerboseCommand call with the
+ # proper verbosity value
+ "juriscraper": {
+ "handlers": ["console"],
+ "propagate": True,
+ "level": "CRITICAL",
+ },
},
}
diff --git a/cl/users/templates/profile/deleted.html b/cl/users/templates/profile/deleted.html
index c82d84fbf8..f2db782571 100644
--- a/cl/users/templates/profile/deleted.html
+++ b/cl/users/templates/profile/deleted.html
@@ -1,23 +1,20 @@
{% extends "base.html" %}
-{% block title %}Goodbye – CourtListener.com{% endblock %}
+{% block title %}Account Deleted – CourtListener.com{% endblock %}
{% block sidebar %}{% endblock %}
{% block content %}
-
-
-
We Are Sorry to See You Go
-
Thank you for your time with our site, and goodbye for now. While we
- are concerned with every user that we lose, we do hope you have found
- something else to suit your needs.
-
-
If you haven't had a chance to tell us about your experience, please
- do so at your earliest convenience via our contact
- form. This will allow us to improve the system.
-
-
Goodbye one last time, and fare thee well,
-
~~The CourtListener Team~~
-
-
+
+
+
You Have Deleted Your Account
+
Thank you for your time with our site and goodbye for now.
+
If you are a regular contributor to Free Law Project, you may wish to cancel membership or monthly donations in our membership portal.
+
+
If you haven't had a chance to tell us about your experience, please do so at your earliest convenience via our contact form. This will allow us to improve the system.
+
+
Goodbye one last time and fare thee well,
+
~~The CourtListener Team~~
+
+
{% endblock %}
diff --git a/poetry.lock b/poetry.lock
index f7f6c67e40..a769a59f6a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2234,13 +2234,13 @@ setuptools = "*"
[[package]]
name = "juriscraper"
-version = "2.6.10"
+version = "2.6.12"
description = "An API to scrape American court websites for metadata."
optional = false
python-versions = "*"
files = [
- {file = "juriscraper-2.6.10-py27-none-any.whl", hash = "sha256:10edd6d6d524cb4d01ba29d22d85bfe99c5d5dd0eedd13c9788583c4bbcad389"},
- {file = "juriscraper-2.6.10.tar.gz", hash = "sha256:b374359b21f817af5ad916d0653dd87420dc2b4a588df1b51a42083f3c0fff03"},
+ {file = "juriscraper-2.6.12-py27-none-any.whl", hash = "sha256:7a9b28d149e476eec654b0ea8ad4f04375a7597cc8d32bc169dff31bb8a5e63c"},
+ {file = "juriscraper-2.6.12.tar.gz", hash = "sha256:b70dd3c441e91c80414fc0fce32f78c1c0e6d480996784f94ea94aa4ce033644"},
]
[package.dependencies]
diff --git a/scripts/make_bulk_data.sh b/scripts/make_bulk_data.sh
index 3a4a28f58e..42469203e7 100755
--- a/scripts/make_bulk_data.sh
+++ b/scripts/make_bulk_data.sh
@@ -93,6 +93,12 @@ opinioncluster_fields='(
)'
opinioncluster_csv_filename="opinion-clusters-$(date -I).csv"
+search_opinion_joined_by_fields='(
+ id, opinion_id, person_id
+)'
+search_opinion_joined_by_csv_filename="search_opinion_joined_by-$(date -I).csv"
+
+
# search_opinion
opinion_fields='(
id, date_created, date_modified, author_str, per_curiam, joined_by_str,
@@ -184,12 +190,28 @@ politicalaffiliation_fields='(
)'
politicalaffiliation_csv_filename="people-db-political-affiliations-$(date -I).csv"
+# people_db_race
+people_db_race_fields='(id, race)'
+people_db_race_csv_filename="people_db_race-$(date -I).csv"
+
# people_db_person_race
people_db_person_race_fields='(
id, person_id, race_id
)'
people_db_person_race_csv_filename="people-db-races-$(date -I).csv"
+
+search_opinioncluster_panel_fields='(
+ id, opinioncluster_id, person_id
+ )'
+search_opinioncluster_panel_csv_filename="search_opinioncluster_panel-$(date -I).csv"
+
+
+search_opinioncluster_non_participating_judges_fields='(
+ id, opinioncluster_id, person_id
+ )'
+search_opinioncluster_non_participating_judges_csv_filename="search_opinioncluster_non_participating_judges-$(date -I).csv"
+
# disclosures_financialdisclosure
financialdisclosure_fields='(
id, date_created, date_modified, year, download_filepath, filepath, thumbnail,
@@ -259,37 +281,45 @@ disclosures_debt_fields='(
disclosures_debt_csv_filename="financial-disclosures-debts-$(date -I).csv"
# If you add or remove a table, you need to update this number
-NUM_TABLES=28
+NUM_TABLES=32
# Every new table added to bulk script should be added as an associative array
-declare -a t_1=("search_court" "$court_fields" "$court_csv_filename")
-declare -a t_2=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename")
-declare -a t_3=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename")
-declare -a t_4=("search_docket" "$docket_fields" "$dockets_csv_filename")
-declare -a t_5=("search_originatingcourtinformation" "$originatingcourtinformation_fields" "$originatingcourtinformation_csv_filename")
+# This ordering is important. Tables with foreign key constraints must be loaded in order.
+declare -a t_1=("people_db_person" "$people_db_person_fields" "$people_db_person_csv_filename")
+declare -a t_2=("people_db_race" "$people_db_race_fields" "$people_db_race_csv_filename")
+declare -a t_3=("people_db_school" "$people_db_school_fields" "$people_db_school_csv_filename")
+declare -a t_4=("search_court" "$court_fields" "$court_csv_filename")
+declare -a t_5=("people_db_position" "$people_db_position_fields" "$people_db_position_csv_filename")
declare -a t_6=("recap_fjcintegrateddatabase" "$fjcintegrateddatabase_fields" "$fjcintegrateddatabase_csv_filename")
-declare -a t_7=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
-declare -a t_8=("search_opinion" "$opinion_fields" "$opinions_csv_filename")
-declare -a t_9=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename")
-declare -a t_10=("search_citation" "$citation_fields" "$citations_csv_filename")
-declare -a t_11=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename")
-declare -a t_12=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename")
-declare -a t_13=("people_db_person" "$people_db_person_fields" "$people_db_person_csv_filename")
-declare -a t_14=("people_db_school" "$people_db_school_fields" "$people_db_school_csv_filename")
-declare -a t_15=("people_db_position" "$people_db_position_fields" "$people_db_position_csv_filename")
-declare -a t_16=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename")
-declare -a t_17=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename")
-declare -a t_18=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename")
-declare -a t_19=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename")
-declare -a t_20=("disclosures_financialdisclosure" "$financialdisclosure_fields" "$financialdisclosure_csv_filename")
-declare -a t_21=("disclosures_investment" "$investment_fields" "$investment_csv_filename")
-declare -a t_22=("disclosures_position" "$disclosures_position_fields" "$disclosures_position_csv_filename")
-declare -a t_23=("disclosures_agreement" "$disclosures_agreement_fields" "$disclosures_agreement_csv_filename")
-declare -a t_24=("disclosures_noninvestmentincome" "$noninvestmentincome_fields" "$noninvestmentincome_csv_filename")
-declare -a t_25=("disclosures_spouseincome" "$spouseincome_fields" "$spouseincome_csv_filename")
-declare -a t_26=("disclosures_reimbursement" "$disclosures_reimbursement_fields" "$disclosures_reimbursement_csv_filename")
-declare -a t_27=("disclosures_gift" "$disclosures_gift_fields" "$disclosures_gift_csv_filename")
-declare -a t_28=("disclosures_debt" "$disclosures_debt_fields" "$disclosures_debt_csv_filename")
+declare -a t_7=("search_originatingcourtinformation" "$originatingcourtinformation_fields" "$originatingcourtinformation_csv_filename")
+
+declare -a t_8=("search_docket" "$docket_fields" "$dockets_csv_filename")
+declare -a t_9=("search_opinioncluster" "$opinioncluster_fields" "$opinioncluster_csv_filename")
+declare -a t_10=("search_opinioncluster_panel" "$search_opinioncluster_panel_fields" "$search_opinioncluster_panel_csv_filename")
+declare -a t_11=("search_opinioncluster_non_participating_judges" "$search_opinioncluster_non_participating_judges_fields" "$search_opinioncluster_non_participating_judges_csv_filename")
+
+declare -a t_12=("search_opinion" "$opinion_fields" "$opinions_csv_filename")
+declare -a t_13=("search_opinion_joined_by" "$search_opinion_joined_by_fields" "$search_opinion_joined_by_csv_filename")
+declare -a t_14=("search_courthouse" "$courthouse_fields" "$courthouse_csv_filename")
+declare -a t_15=("search_court_appeals_to" "$court_appeals_to_fields" "$court_appeals_to_csv_filename")
+declare -a t_16=("search_opinionscited" "$opinionscited_fields" "$opinionscited_csv_filename")
+declare -a t_17=("search_citation" "$citation_fields" "$citations_csv_filename")
+declare -a t_18=("search_parenthetical" "$parentheticals_fields" "$parentheticals_csv_filename")
+declare -a t_19=("audio_audio" "$oralarguments_fields" "$oralarguments_csv_filename")
+declare -a t_20=("people_db_retentionevent" "$people_db_retentionevent_fields" "$people_db_retentionevent_csv_filename")
+declare -a t_21=("people_db_education" "$people_db_education_fields" "$people_db_education_csv_filename")
+declare -a t_22=("people_db_politicalaffiliation" "$politicalaffiliation_fields" "$politicalaffiliation_csv_filename")
+declare -a t_23=("people_db_person_race" "$people_db_person_race_fields" "$people_db_person_race_csv_filename")
+
+declare -a t_24=("disclosures_financialdisclosure" "$financialdisclosure_fields" "$financialdisclosure_csv_filename")
+declare -a t_25=("disclosures_investment" "$investment_fields" "$investment_csv_filename")
+declare -a t_26=("disclosures_position" "$disclosures_position_fields" "$disclosures_position_csv_filename")
+declare -a t_27=("disclosures_agreement" "$disclosures_agreement_fields" "$disclosures_agreement_csv_filename")
+declare -a t_28=("disclosures_noninvestmentincome" "$noninvestmentincome_fields" "$noninvestmentincome_csv_filename")
+declare -a t_29=("disclosures_spouseincome" "$spouseincome_fields" "$spouseincome_csv_filename")
+declare -a t_30=("disclosures_reimbursement" "$disclosures_reimbursement_fields" "$disclosures_reimbursement_csv_filename")
+declare -a t_31=("disclosures_gift" "$disclosures_gift_fields" "$disclosures_gift_csv_filename")
+declare -a t_32=("disclosures_debt" "$disclosures_debt_fields" "$disclosures_debt_csv_filename")
# Create a new array with the data of each associative array
declare -a listOfLists
@@ -305,7 +335,7 @@ echo "Streaming ${lst[0]} to S3"
psql \
--command \
"set statement_timeout to 0;
- COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, FORCE_QUOTE *)" \
+ COPY ${lst[0]} ${lst[1]} TO STDOUT WITH (FORMAT csv, ENCODING utf8, HEADER, QUOTE '`', FORCE_QUOTE *)" \
--quiet \
--host "$DB_HOST" \
--username "$DB_USER" \
@@ -324,6 +354,8 @@ pg_dump \
--table 'search_*' \
--table 'people_db_*' \
--table 'audio_*' \
+ --table 'recap_*' \
+ --table 'disclosures_*' \
--no-privileges \
--no-publications \
--no-subscriptions courtlistener | \
@@ -384,7 +416,7 @@ declare -a lst="$group"
cat >> "$OUT" <<- EOF
echo "Loading ${lst[2]} to database"
psql --command \
-"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, HEADER)" \
+"COPY public.${lst[0]} ${lst[1]} FROM '\$BULK_DIR/${lst[2]}' WITH (FORMAT csv, ENCODING utf8, QUOTE '`', HEADER)" \
--host "\$BULK_DB_HOST" \
--username "\$BULK_DB_USER" \
--dbname "\$BULK_DB_NAME"