-
Notifications
You must be signed in to change notification settings - Fork 0
/
aws_re_invent.py
220 lines (185 loc) · 9.57 KB
/
aws_re_invent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import datetime
import re
import sys
import time
from collections import namedtuple
from itertools import groupby
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException
from bash_color import BashColor
from browser_handler import BrowserHandler
from console_output import print_sessions, print_day_schedule
from file_impex import load_sessions_from_csv, save_sessions_to_csv, save_sessions_to_ical
class AWSreInvent:
def __init__(self, args):
self.args = args
self.aws_re_invent_catalog_favorites = "https://www.portal.reinvent.awsevents.com/connect/interests.ww"
login_form_selector = "//form[@id='loginForm']"
self.LOGIN_USERNAME_SELECTOR = login_form_selector + "//input[@id='loginUsername']"
self.LOGIN_PASSWORD_SELECTOR = login_form_selector + "//input[@id='loginPassword']"
self.LOGIN_BUTTON_SELECTOR = login_form_selector + "//input[@type='submit']"
if self.args.file is None:
self._init_browser()
self.html_tag_regex = re.compile(r"<[^<]*>", re.IGNORECASE)
self.html_tag_with_content_regex = re.compile(r"<[^<]*>[^<]*</[^<]*>", re.IGNORECASE)
def _init_browser(self):
self.browser_handler = BrowserHandler(self.args)
self.browser = self.browser_handler.browser
self.login()
def login(self):
self.browser.get(self.aws_re_invent_catalog_favorites)
time.sleep(1)
self._handle_cookie_agreement_banner()
iteration = 0
while self._user_is_not_logged_in():
iteration += 1
try:
self._insert_login_credentials()
self._click_login_button()
except NoSuchElementException as e:
if iteration > 10:
raise e
time.sleep(iteration * 1)
continue
if iteration > 2:
self._handle_login_unsuccessful()
def _handle_cookie_agreement_banner(self):
if len(self.browser.find_elements_by_xpath("//div[@id='cookieAgreementDisplayText']")) > 0:
cookie_agreement_button = self.browser.find_element_by_xpath("//input[@id='cookieAgreementAcceptButton']")
cookie_agreement_button.click()
def _user_is_not_logged_in(self):
return 'logout' not in self.browser.page_source
def _insert_login_credentials(self):
login_field_user = self.browser.find_element_by_xpath(self.LOGIN_USERNAME_SELECTOR)
login_field_user.clear()
login_field_user.send_keys(self.args.username)
login_field_password = self.browser.find_element_by_xpath(self.LOGIN_PASSWORD_SELECTOR)
login_field_password.clear()
login_field_password.send_keys(self.args.password)
def _click_login_button(self):
login_button = self.browser.find_element_by_xpath(self.LOGIN_BUTTON_SELECTOR)
login_button.click()
time.sleep(2) # wait for page to load
def _handle_login_unsuccessful(self):
time.sleep(1)
if self._user_is_not_logged_in():
sys.stderr.write("Login to AWS re:Invent page failed.")
sys.stdout.flush()
self.browser_handler.kill()
sys.exit(1)
def handle_sessions(self):
if self.args.file:
sessions = load_sessions_from_csv(self.args.file)
else:
sessions = self.parse_sessions()
save_sessions_to_csv(sessions)
if self.args.ical:
save_sessions_to_ical(sessions)
special_events = load_sessions_from_csv('special_events.csv')
sessions = sessions + special_events
sessions.sort(key=lambda session: session.start)
grouped_sessions = groupby(sessions, lambda session: datetime.datetime.strftime(session.start, '%A, %Y-%m-%d'))
for key, group in grouped_sessions:
sessions = list(group)
group_size = len(sessions)
sessions = self.filter_sessions_by_arguments(sessions)
if (self.args.day is None or self.args.day.lower() in key.lower()) and not self.args.quiet:
print()
print(
BashColor.BOLD +
'### {day} [showing {filtered_size} of {day_size} total items] #################################################'.format(
day=key,
filtered_size=len(sessions),
day_size=group_size
) + BashColor.END)
print()
if not self.args.verbose:
print_day_schedule(sessions)
else:
print_sessions(sessions, self.args)
def filter_sessions_by_arguments(self, sessions):
if self.args.type:
sessions = [session for session in sessions if self.args.type.strip().lower() == session.type.lower()]
if self.args.speaker:
sessions = [session for session in sessions if self.args.speaker.strip().lower() in session.speaker.lower()]
if self.args.location:
sessions = [session for session in sessions if self.args.location.strip().lower() in session.location.lower()]
if self.args.abstract:
sessions = [session for session in sessions if self.args.abstract.strip().lower() in session.abstract.lower()]
if self.args.name:
sessions = [session for session in sessions if self.args.name.strip().lower() in session.title.lower()]
if self.args.reserved:
sessions = [session for session in sessions if session.reserved]
return sessions
def parse_sessions(self):
self.browser.get(self.aws_re_invent_catalog_favorites)
time.sleep(1)
self._open_all_session_details()
sessions_page = BeautifulSoup(self.browser.page_source, 'html.parser')
sessions_tab = sessions_page.find('div', id='sessionsTab')
sessions_rows = sessions_tab.find_all('div', class_='resultRow')
sessions = []
for session_row in sessions_rows:
session = self._parse_session(session_row)
if session:
sessions.append(session)
return sessions
def _open_all_session_details(self):
session_schedule_details_buttons = self.browser.find_elements_by_xpath(
"//div[@class='sessionTimes']/a[@class='expandSessionImg']"
)
for session_schedule_details_button in session_schedule_details_buttons:
session_schedule_details_button.click()
session_abstract_details_buttons = self.browser.find_elements_by_xpath(
"//a[contains(@class, 'moreLink')]"
)
for session_abstract_details_button in session_abstract_details_buttons:
session_abstract_details_button.click()
def _parse_session(self, session_row):
session = namedtuple('Session', ['title', 'id', 'type', 'speakers', 'abstract', 'start', 'end', 'location', 'reserved'])
session.title = session_row.find('span', class_='title').get_text()
session.id = session_row.find('span', class_='abbreviation').get_text().rstrip(' - ')
session.type = session_row.find('small', class_='type').get_text()
text = session_row.find('span', class_='scheduleStatus').get_text()
session.reserved = 'reserved' in text.lower()
html_element_id = session_row['id']
session.speakers = self._parse_session_speakers(session_row)
session.abstract = session_row.find('span', class_='abstract').get_text().strip().rstrip(' View Less')
session_details = self.browser.find_element_by_xpath(
"//div[@id='{element_id}']//ul".format(element_id=html_element_id)
).get_attribute('innerHTML')
if "There aren't any available sessions at this time." in session_details:
return None
session.start, session.end = self._parse_session_datetime(session_details)
session.location = self.browser.find_element_by_xpath(
"//div[@id='{element_id}']//span[contains(@class, 'sessionRoom')]".format(element_id=html_element_id)
).text.lstrip('– ')
return session
def _parse_session_datetime(self, session_details):
session_start, session_end_time = self.html_tag_with_content_regex.sub('', session_details).split(' - ')
session_start_time = str(datetime.datetime.now().year) + ' ' + self.html_tag_regex.sub('', session_start)
session_start_datetime = datetime.datetime.strptime(session_start_time, '%Y %A, %b %d, %I:%M %p')
session_end_hour, session_end_minute = session_end_time.split(' ')[0].split(':')
session_end_am_pm = session_end_time.split(' ')[1]
session_end_hour = self._convert_12_am_or_pm('PM' in session_end_am_pm.upper(), session_end_hour)
session_end_datetime = session_start_datetime.replace(
hour=int(session_end_hour),
minute=int(session_end_minute)
)
return session_start_datetime, session_end_datetime
@staticmethod
def _convert_12_am_or_pm(is_pm, hour):
if is_pm:
hour = (int(hour) % 12) + 12
elif int(hour) == 12: # AM
hour = 0
return hour
def _parse_session_speakers(self, session_row):
session_speakers = []
parsed_speakers = str(session_row.find('small', class_='speakers')).strip().split('<br/>')[:-1]
for parsed_speaker in parsed_speakers:
parsed_speaker = parsed_speaker.replace('\t', '')
parsed_speaker = parsed_speaker.replace('\n', '')
parsed_speaker = self.html_tag_regex.sub('', parsed_speaker)
session_speakers.append(parsed_speaker)
return session_speakers