Skip to content

Commit

Permalink
Enrich data with Mailchimp (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
Eric-Arellano authored Jun 29, 2024
1 parent f6913bb commit 564ab1e
Show file tree
Hide file tree
Showing 10 changed files with 90 additions and 61 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ jobs:
SALESFORCE_PASSWORD: ${{ secrets.SALESFORCE_PASSWORD }}
SALESFORCE_TOKEN: ${{ secrets.SALESFORCE_TOKEN }}
ENCRYPTION_KEY: ${{ secrets.ENCRYPTION_KEY }}
MAILCHIMP_KEY: ${{ secrets.MAILCHIMP_KEY }}
MAILCHIMP_LIST_ID: ${{ secrets.MAILCHIMP_LIST_ID }}
22 changes: 22 additions & 0 deletions default.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
// "generated_with_requirements": [
// "cryptography",
// "geopy",
// "mailchimp3",
// "pydantic",
// "pytest",
// "python-Levenshtein",
Expand Down Expand Up @@ -1483,6 +1484,26 @@
"requires_python": ">=3.6",
"version": "5.2.2"
},
{
"artifacts": [
{
"algorithm": "sha256",
"hash": "6eb335fcd915b3233285a0bdf6317fdfc412b4a638c102ae3a7a52019d3f0970",
"url": "https://files.pythonhosted.org/packages/5c/a9/772609122afc83ac149bb03ec92e2e91659b3d274e8dbfd0cd02ec5e78f7/mailchimp3-3.0.21-py2.py3-none-any.whl"
},
{
"algorithm": "sha256",
"hash": "5e2930ece6144abb659d45e692e92135ab05c9027d3f5e807c0f66cfb374b9ad",
"url": "https://files.pythonhosted.org/packages/08/71/cdef8e888784c5da1186ce82e8d3414dc3d8e47a9fe7b62b483e217591eb/mailchimp3-3.0.21.tar.gz"
}
],
"project_name": "mailchimp3",
"requires_dists": [
"requests>=2.7.0"
],
"requires_python": null,
"version": "3.0.21"
},
{
"artifacts": [
{
Expand Down Expand Up @@ -2604,6 +2625,7 @@
"requirements": [
"cryptography",
"geopy",
"mailchimp3",
"pydantic",
"pytest",
"python-Levenshtein",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[[tool.mypy.overrides]]
module = [
"geopy",
"mailchimp3",
"uszipcode"
]
ignore_missing_imports = true
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
cryptography
geopy
uszipcode
mailchimp3
python-Levenshtein
pydantic
pytest
simple-salesforce
uszipcode
34 changes: 34 additions & 0 deletions src/mailchimp_coordinates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import logging
import os
from typing import Any, NamedTuple

from mailchimp3 import MailChimp

logging.getLogger("mailchimp3.client").setLevel(logging.CRITICAL)


class Coordinates(NamedTuple):
latitude: float
longitude: float

@classmethod
def from_mailchimp(cls, entry: dict[str, Any]) -> "Coordinates | None":
lat: float = entry["location"]["latitude"]
long: float = entry["location"]["longitude"]
return cls(lat, long) if lat and long else None


def get_coordinates_by_email() -> dict[str, Coordinates | None]:
key = os.environ.pop("MAILCHIMP_KEY")
list_id = os.environ.pop("MAILCHIMP_LIST_ID")
client = MailChimp(mc_api=key)
result = client.lists.members.all(
list_id=list_id,
fields="members.email_address,members.location.latitude,members.location.longitude",
get_all=True,
)["members"]
return {
entry["email_address"]: coords
for entry in result
if (coords := Coordinates.from_mailchimp(entry)) is not None
}
32 changes: 0 additions & 32 deletions src/mailchimp_entry.py

This file was deleted.

11 changes: 6 additions & 5 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import metro_csvs
import salesforce_api
from mailchimp_entry import MailchimpEntry
from mailchimp_coordinates import get_coordinates_by_email

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
Expand All @@ -25,9 +25,10 @@ def main() -> None:

salesforce_client = salesforce_api.init_client()
entries = salesforce_api.load_data(salesforce_client)
logger.info(f"Loaded {len(entries)} Salesforce records")

# TODO: read in Mailchimp data
mailchimp_by_email: dict[str, MailchimpEntry] = {}
coordinates_by_email = get_coordinates_by_email()
logger.info(f"Loaded {len(coordinates_by_email)} coordinates from Mailchimp")

us_zip_to_metro = metro_csvs.read_us_zip_to_metro()
us_city_and_state_to_metro = metro_csvs.read_us_city_and_state_to_metro()
Expand All @@ -40,8 +41,8 @@ def main() -> None:

# The order of operations matters.
if entry.email:
entry.populate_via_latitude_longitude(
mailchimp_by_email.get(entry.email), geocoder
entry.populate_via_coordinates(
coordinates_by_email.get(entry.email), geocoder
)
entry.normalize()
entry.populate_via_zipcode(zipcode_search_engine)
Expand Down
12 changes: 6 additions & 6 deletions src/salesforce_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@


def init_client() -> Salesforce:
USERNAME = os.environ.pop("SALESFORCE_USERNAME")
PASSWORD = os.environ.pop("SALESFORCE_PASSWORD")
TOKEN = os.environ.pop("SALESFORCE_TOKEN")
username = os.environ.pop("SALESFORCE_USERNAME")
password = os.environ.pop("SALESFORCE_PASSWORD")
token = os.environ.pop("SALESFORCE_TOKEN")
return Salesforce(
username=USERNAME,
password=PASSWORD,
security_token=TOKEN,
username=username,
password=password,
security_token=token,
client_id="salesforce-data-enrichment",
)

Expand Down
22 changes: 11 additions & 11 deletions src/salesforce_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from uszipcode import SearchEngine
from pydantic import BaseModel, Field

from mailchimp_entry import MailchimpEntry
from mailchimp_coordinates import Coordinates
from country_codes import COUNTRY_CODES_TWO_LETTER_TO_THREE, COUNTRY_NAMES_TO_THREE
from state_codes import US_STATES_TO_CODES

Expand All @@ -12,8 +12,8 @@ class SalesforceEntry(BaseModel):
email: str | None = Field(..., alias="Email", frozen=True)
city: str | None = Field(..., alias="MailingCity")
country: str | None = Field(..., alias="MailingCountry")
latitude: str | None = Field(..., alias="MailingLatitude")
longitude: str | None = Field(..., alias="MailingLongitude")
latitude: float | None = Field(..., alias="MailingLatitude")
longitude: float | None = Field(..., alias="MailingLongitude")
zipcode: str | None = Field(..., alias="MailingPostalCode")
state: str | None = Field(..., alias="MailingState")
street: str | None = Field(..., alias="MailingStreet")
Expand All @@ -25,8 +25,8 @@ def mock(
*,
city: str | None = None,
country: str | None = None,
latitude: str | None = None,
longitude: str | None = None,
latitude: float | None = None,
longitude: float | None = None,
zipcode: str | None = None,
state: str | None = None,
street: str | None = None,
Expand Down Expand Up @@ -83,24 +83,24 @@ def normalize(self) -> None:
raise AssertionError(f"Unexpected zipcode for {self}")
self.zipcode = self.zipcode[:5]

def populate_via_latitude_longitude(
self, mailchimp: MailchimpEntry | None, geocoder: Nominatim
def populate_via_coordinates(
self, coordinates: Coordinates | None, geocoder: Nominatim
) -> None:
if mailchimp is None or not (mailchimp.latitude and mailchimp.longitude):
if coordinates is None:
return

metro_area_can_be_computed = self.zipcode or (self.city and self.country)
if metro_area_can_be_computed:
return

addr = geocoder.reverse(f"{mailchimp.latitude}, {mailchimp.longitude}").raw[
addr = geocoder.reverse(f"{coordinates.latitude}, {coordinates.longitude}").raw[
"address"
]
if "postcode" not in addr:
return

self.latitude = mailchimp.latitude
self.longitude = mailchimp.longitude
self.latitude = coordinates.latitude
self.longitude = coordinates.longitude
self.zipcode = addr["postcode"]

# Also overwrite any existing values so that we don't mix the prior address
Expand Down
12 changes: 6 additions & 6 deletions src/salesforce_entry_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
from uszipcode import SearchEngine

from mailchimp_entry import MailchimpEntry
from mailchimp_coordinates import Coordinates
from salesforce_entry import SalesforceEntry


Expand Down Expand Up @@ -95,16 +95,16 @@ def test_populate_via_zipcode(
assert entry.city == expected_city


def test_populate_via_lat_long(geocoder_mock) -> None:
mailchimp = MailchimpEntry.mock(latitude="1.1", longitude="'4.2")
def test_populate_via_coordinates(geocoder_mock) -> None:
coordinates = Coordinates(latitude=1.1, longitude=4.2)
entry = SalesforceEntry.mock()
entry.populate_via_latitude_longitude(mailchimp, geocoder_mock)
entry.populate_via_coordinates(coordinates, geocoder_mock)
assert entry.city == "New York"
assert entry.state == "NY"
assert entry.country == "USA"
assert entry.zipcode == "11370"
assert entry.latitude == "1.1"
assert entry.longitude == "4.2"
assert entry.latitude == 1.1
assert entry.longitude == 4.2


@pytest.mark.parametrize(
Expand Down

0 comments on commit 564ab1e

Please sign in to comment.