From 564ab1e233bb87261ec8db22c2a4206eff0feee2 Mon Sep 17 00:00:00 2001 From: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com> Date: Sat, 29 Jun 2024 14:35:47 -0400 Subject: [PATCH] Enrich data with Mailchimp (#14) --- .github/workflows/cron.yaml | 2 ++ default.lock | 22 ++++++++++++++++++++++ pyproject.toml | 1 + requirements.txt | 3 ++- src/mailchimp_coordinates.py | 34 ++++++++++++++++++++++++++++++++++ src/mailchimp_entry.py | 32 -------------------------------- src/main.py | 11 ++++++----- src/salesforce_api.py | 12 ++++++------ src/salesforce_entry.py | 22 +++++++++++----------- src/salesforce_entry_test.py | 12 ++++++------ 10 files changed, 90 insertions(+), 61 deletions(-) create mode 100644 src/mailchimp_coordinates.py delete mode 100644 src/mailchimp_entry.py diff --git a/.github/workflows/cron.yaml b/.github/workflows/cron.yaml index 83e4d53..ca175ff 100644 --- a/.github/workflows/cron.yaml +++ b/.github/workflows/cron.yaml @@ -24,3 +24,5 @@ jobs: SALESFORCE_PASSWORD: ${{ secrets.SALESFORCE_PASSWORD }} SALESFORCE_TOKEN: ${{ secrets.SALESFORCE_TOKEN }} ENCRYPTION_KEY: ${{ secrets.ENCRYPTION_KEY }} + MAILCHIMP_KEY: ${{ secrets.MAILCHIMP_KEY }} + MAILCHIMP_LIST_ID: ${{ secrets.MAILCHIMP_LIST_ID }} diff --git a/default.lock b/default.lock index 665e075..6f5db46 100644 --- a/default.lock +++ b/default.lock @@ -11,6 +11,7 @@ // "generated_with_requirements": [ // "cryptography", // "geopy", +// "mailchimp3", // "pydantic", // "pytest", // "python-Levenshtein", @@ -1483,6 +1484,26 @@ "requires_python": ">=3.6", "version": "5.2.2" }, + { + "artifacts": [ + { + "algorithm": "sha256", + "hash": "6eb335fcd915b3233285a0bdf6317fdfc412b4a638c102ae3a7a52019d3f0970", + "url": "https://files.pythonhosted.org/packages/5c/a9/772609122afc83ac149bb03ec92e2e91659b3d274e8dbfd0cd02ec5e78f7/mailchimp3-3.0.21-py2.py3-none-any.whl" + }, + { + "algorithm": "sha256", + "hash": "5e2930ece6144abb659d45e692e92135ab05c9027d3f5e807c0f66cfb374b9ad", + "url": "https://files.pythonhosted.org/packages/08/71/cdef8e888784c5da1186ce82e8d3414dc3d8e47a9fe7b62b483e217591eb/mailchimp3-3.0.21.tar.gz" + } + ], + "project_name": "mailchimp3", + "requires_dists": [ + "requests>=2.7.0" + ], + "requires_python": null, + "version": "3.0.21" + }, { "artifacts": [ { @@ -2604,6 +2625,7 @@ "requirements": [ "cryptography", "geopy", + "mailchimp3", "pydantic", "pytest", "python-Levenshtein", diff --git a/pyproject.toml b/pyproject.toml index d8ebf53..7f94501 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [[tool.mypy.overrides]] module = [ "geopy", + "mailchimp3", "uszipcode" ] ignore_missing_imports = true diff --git a/requirements.txt b/requirements.txt index 7184db5..923c9de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ cryptography geopy -uszipcode +mailchimp3 python-Levenshtein pydantic pytest simple-salesforce +uszipcode diff --git a/src/mailchimp_coordinates.py b/src/mailchimp_coordinates.py new file mode 100644 index 0000000..b908fa5 --- /dev/null +++ b/src/mailchimp_coordinates.py @@ -0,0 +1,34 @@ +import logging +import os +from typing import Any, NamedTuple + +from mailchimp3 import MailChimp + +logging.getLogger("mailchimp3.client").setLevel(logging.CRITICAL) + + +class Coordinates(NamedTuple): + latitude: float + longitude: float + + @classmethod + def from_mailchimp(cls, entry: dict[str, Any]) -> "Coordinates | None": + lat: float = entry["location"]["latitude"] + long: float = entry["location"]["longitude"] + return cls(lat, long) if lat and long else None + + +def get_coordinates_by_email() -> dict[str, Coordinates | None]: + key = os.environ.pop("MAILCHIMP_KEY") + list_id = os.environ.pop("MAILCHIMP_LIST_ID") + client = MailChimp(mc_api=key) + result = client.lists.members.all( + list_id=list_id, + fields="members.email_address,members.location.latitude,members.location.longitude", + get_all=True, + )["members"] + return { + entry["email_address"]: coords + for entry in result + if (coords := Coordinates.from_mailchimp(entry)) is not None + } diff --git a/src/mailchimp_entry.py b/src/mailchimp_entry.py deleted file mode 100644 index 62218ea..0000000 --- a/src/mailchimp_entry.py +++ /dev/null @@ -1,32 +0,0 @@ -from pydantic import BaseModel, Field, field_validator - - -class MailchimpEntry(BaseModel): - email: str = Field(..., alias="Email Address") - latitude: str = Field(..., alias="LATITUDE") - longitude: str = Field(..., alias="LONGITUDE") - - @field_validator("latitude") - @classmethod - def normalize_latitude(cls, v: str) -> str: - return v.lstrip("'") - - @field_validator("longitude") - @classmethod - def normalize_longitude(cls, v: str) -> str: - return v.lstrip("'") - - @classmethod - def mock( - cls, - *, - latitude: str | None = None, - longitude: str | None = None, - ) -> "MailchimpEntry": - return cls( - **{ - "Email Address": "", - "LATITUDE": latitude or "", - "LONGITUDE": longitude or "", - } - ) diff --git a/src/main.py b/src/main.py index d67d2a5..6f8103b 100644 --- a/src/main.py +++ b/src/main.py @@ -6,7 +6,7 @@ import metro_csvs import salesforce_api -from mailchimp_entry import MailchimpEntry +from mailchimp_coordinates import get_coordinates_by_email logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s") @@ -25,9 +25,10 @@ def main() -> None: salesforce_client = salesforce_api.init_client() entries = salesforce_api.load_data(salesforce_client) + logger.info(f"Loaded {len(entries)} Salesforce records") - # TODO: read in Mailchimp data - mailchimp_by_email: dict[str, MailchimpEntry] = {} + coordinates_by_email = get_coordinates_by_email() + logger.info(f"Loaded {len(coordinates_by_email)} coordinates from Mailchimp") us_zip_to_metro = metro_csvs.read_us_zip_to_metro() us_city_and_state_to_metro = metro_csvs.read_us_city_and_state_to_metro() @@ -40,8 +41,8 @@ def main() -> None: # The order of operations matters. if entry.email: - entry.populate_via_latitude_longitude( - mailchimp_by_email.get(entry.email), geocoder + entry.populate_via_coordinates( + coordinates_by_email.get(entry.email), geocoder ) entry.normalize() entry.populate_via_zipcode(zipcode_search_engine) diff --git a/src/salesforce_api.py b/src/salesforce_api.py index bae1e2c..f538a01 100644 --- a/src/salesforce_api.py +++ b/src/salesforce_api.py @@ -6,13 +6,13 @@ def init_client() -> Salesforce: - USERNAME = os.environ.pop("SALESFORCE_USERNAME") - PASSWORD = os.environ.pop("SALESFORCE_PASSWORD") - TOKEN = os.environ.pop("SALESFORCE_TOKEN") + username = os.environ.pop("SALESFORCE_USERNAME") + password = os.environ.pop("SALESFORCE_PASSWORD") + token = os.environ.pop("SALESFORCE_TOKEN") return Salesforce( - username=USERNAME, - password=PASSWORD, - security_token=TOKEN, + username=username, + password=password, + security_token=token, client_id="salesforce-data-enrichment", ) diff --git a/src/salesforce_entry.py b/src/salesforce_entry.py index 949d3b8..70795b4 100644 --- a/src/salesforce_entry.py +++ b/src/salesforce_entry.py @@ -2,7 +2,7 @@ from uszipcode import SearchEngine from pydantic import BaseModel, Field -from mailchimp_entry import MailchimpEntry +from mailchimp_coordinates import Coordinates from country_codes import COUNTRY_CODES_TWO_LETTER_TO_THREE, COUNTRY_NAMES_TO_THREE from state_codes import US_STATES_TO_CODES @@ -12,8 +12,8 @@ class SalesforceEntry(BaseModel): email: str | None = Field(..., alias="Email", frozen=True) city: str | None = Field(..., alias="MailingCity") country: str | None = Field(..., alias="MailingCountry") - latitude: str | None = Field(..., alias="MailingLatitude") - longitude: str | None = Field(..., alias="MailingLongitude") + latitude: float | None = Field(..., alias="MailingLatitude") + longitude: float | None = Field(..., alias="MailingLongitude") zipcode: str | None = Field(..., alias="MailingPostalCode") state: str | None = Field(..., alias="MailingState") street: str | None = Field(..., alias="MailingStreet") @@ -25,8 +25,8 @@ def mock( *, city: str | None = None, country: str | None = None, - latitude: str | None = None, - longitude: str | None = None, + latitude: float | None = None, + longitude: float | None = None, zipcode: str | None = None, state: str | None = None, street: str | None = None, @@ -83,24 +83,24 @@ def normalize(self) -> None: raise AssertionError(f"Unexpected zipcode for {self}") self.zipcode = self.zipcode[:5] - def populate_via_latitude_longitude( - self, mailchimp: MailchimpEntry | None, geocoder: Nominatim + def populate_via_coordinates( + self, coordinates: Coordinates | None, geocoder: Nominatim ) -> None: - if mailchimp is None or not (mailchimp.latitude and mailchimp.longitude): + if coordinates is None: return metro_area_can_be_computed = self.zipcode or (self.city and self.country) if metro_area_can_be_computed: return - addr = geocoder.reverse(f"{mailchimp.latitude}, {mailchimp.longitude}").raw[ + addr = geocoder.reverse(f"{coordinates.latitude}, {coordinates.longitude}").raw[ "address" ] if "postcode" not in addr: return - self.latitude = mailchimp.latitude - self.longitude = mailchimp.longitude + self.latitude = coordinates.latitude + self.longitude = coordinates.longitude self.zipcode = addr["postcode"] # Also overwrite any existing values so that we don't mix the prior address diff --git a/src/salesforce_entry_test.py b/src/salesforce_entry_test.py index c8201ab..97b910e 100644 --- a/src/salesforce_entry_test.py +++ b/src/salesforce_entry_test.py @@ -3,7 +3,7 @@ import pytest from uszipcode import SearchEngine -from mailchimp_entry import MailchimpEntry +from mailchimp_coordinates import Coordinates from salesforce_entry import SalesforceEntry @@ -95,16 +95,16 @@ def test_populate_via_zipcode( assert entry.city == expected_city -def test_populate_via_lat_long(geocoder_mock) -> None: - mailchimp = MailchimpEntry.mock(latitude="1.1", longitude="'4.2") +def test_populate_via_coordinates(geocoder_mock) -> None: + coordinates = Coordinates(latitude=1.1, longitude=4.2) entry = SalesforceEntry.mock() - entry.populate_via_latitude_longitude(mailchimp, geocoder_mock) + entry.populate_via_coordinates(coordinates, geocoder_mock) assert entry.city == "New York" assert entry.state == "NY" assert entry.country == "USA" assert entry.zipcode == "11370" - assert entry.latitude == "1.1" - assert entry.longitude == "4.2" + assert entry.latitude == 1.1 + assert entry.longitude == 4.2 @pytest.mark.parametrize(