Skip to content

Commit

Permalink
Gmail API pagination (#469)
Browse files Browse the repository at this point in the history
* Use pagination to fetch more than 100 Gmail messages at once

* Provide `paginate_messages` option to allow use of previous behavior
  • Loading branch information
mkupferman authored Feb 19, 2024
1 parent d2145b7 commit efe7409
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 9 deletions.
2 changes: 2 additions & 0 deletions docs/source/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ The full set of configuration options are:
(Default: `https://www.googleapis.com/auth/gmail.modify`)
- `oauth2_port` - int: The TCP port for the local server to
listen on for the OAuth2 response (Default: `8080`)
- `paginate_messages` - bool: When `True`, fetch all applicable Gmail messages.
When `False`, only fetch up to 100 new messages per run (Default: `True`)
- `log_analytics`
- `client_id` - str: The app registration's client ID
- `client_secret` - str: The app registration's client secret
Expand Down
4 changes: 4 additions & 0 deletions parsedmarc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ def process_reports(reports_):
gmail_api_credentials_file=None,
gmail_api_token_file=None,
gmail_api_include_spam_trash=False,
gmail_api_paginate_messages=True,
gmail_api_scopes=[],
gmail_api_oauth2_port=8080,
log_file=args.log_file,
Expand Down Expand Up @@ -829,6 +830,8 @@ def process_reports(reports_):
gmail_api_config.get("token_file", ".token")
opts.gmail_api_include_spam_trash = \
gmail_api_config.getboolean("include_spam_trash", False)
opts.gmail_api_paginate_messages = \
gmail_api_config.getboolean("paginate_messages", True)
opts.gmail_api_scopes = \
gmail_api_config.get("scopes",
default_gmail_api_scope)
Expand Down Expand Up @@ -1098,6 +1101,7 @@ def process_reports(reports_):
token_file=opts.gmail_api_token_file,
scopes=opts.gmail_api_scopes,
include_spam_trash=opts.gmail_api_include_spam_trash,
paginate_messages=opts.gmail_api_paginate_messages,
reports_folder=opts.mailbox_reports_folder,
oauth2_port=opts.gmail_api_oauth2_port
)
Expand Down
34 changes: 25 additions & 9 deletions parsedmarc/mail/gmail.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,13 @@ def __init__(self,
scopes: List[str],
include_spam_trash: bool,
reports_folder: str,
oauth2_port: int):
oauth2_port: int,
paginate_messages: bool):
creds = _get_creds(token_file, credentials_file, scopes, oauth2_port)
self.service = build('gmail', 'v1', credentials=creds)
self.include_spam_trash = include_spam_trash
self.reports_label_id = self._find_label_id_for_label(reports_folder)
self.paginate_messages = paginate_messages

def create_folder(self, folder_name: str):
# Gmail doesn't support the name Archive
Expand All @@ -65,16 +67,30 @@ def create_folder(self, folder_name: str):
else:
raise e

def _fetch_all_message_ids(self, reports_label_id, page_token=None):
results = (
self.service.users()
.messages()
.list(
userId="me",
includeSpamTrash=self.include_spam_trash,
labelIds=[reports_label_id],
pageToken=page_token,
)
.execute()
)
messages = results.get("messages", [])
for message in messages:
yield message["id"]

if "nextPageToken" in results and self.paginate_messages:
yield from self._fetch_all_message_ids(
reports_label_id, results["nextPageToken"]
)

def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
reports_label_id = self._find_label_id_for_label(reports_folder)
results = self.service.users().messages()\
.list(userId='me',
includeSpamTrash=self.include_spam_trash,
labelIds=[reports_label_id]
)\
.execute()
messages = results.get('messages', [])
return [message['id'] for message in messages]
return [id for id in self._fetch_all_message_ids(reports_label_id)]

def fetch_message(self, message_id):
msg = self.service.users().messages()\
Expand Down

0 comments on commit efe7409

Please sign in to comment.