-
Notifications
You must be signed in to change notification settings - Fork 0
/
lambda_demo.py
98 lines (73 loc) · 3.11 KB
/
lambda_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Importing necessary libraries for handling data, making HTTP requests, dealing with dates, and interacting with AWS S3
import json
import pandas as pd
import json
import pandas as pd
import time
import requests
import random
import time
import boto3
from datetime import datetime
import datetime
import os
# Initialize the S3 client for interacting with AWS S3
s3_client = boto3.client('s3')
# Constructing the API URL for fetching channel statistics
def get_stats(api_key,channel_id):
url_channel_stats = 'https://youtube.googleapis.com/youtube/v3/channels?part=statistics&id='+channel_id+'&key='+api_key
response_channels = requests.get(url_channel_stats)
channel_stats = json.loads(response_channels.content)
channel_stats = channel_stats['items'][0]['statistics']
date = pd.to_datetime('today').strftime("%Y-%m-%d")
data_channel = {
'Date':date,
'Total_Views':int(float(channel_stats['viewCount'])),
'Subscribers':int(float(channel_stats['subscriberCount'])),
'Video_count':int(float(channel_stats['videoCount']))
}
return data_channel
# Gathers statistics for multiple YouTube channels listed in a DataFrame.
def channels_stats(df,api_key):
date = []
views = []
suscriber = []
video_count = []
channel_name = []
tiempo = [1,2.5,2]
for i in range(len(df)):
stats_temp = get_stats(api_key,df['Channel_id'][i])
channel_name.append(df['Channel_name'][i])
date.append(stats_temp['Date'])
views.append(stats_temp['Total_Views'])
suscriber.append(stats_temp['Subscribers'])
video_count.append(stats_temp['Video_count'])
time.sleep(random.choice(tiempo))
data = {
'Channel_name':channel_name,
'Subscribers':suscriber,
'Video_count':video_count,
'Total_Views':views,
'Createt_at':date,
}
df_channels = pd.DataFrame(data)
return df_channels
# AWS Lambda function handler to fetch YouTube channels' statistics and store them in AWS S3.
def lambda_handler(event, context):
# Retrieving necessary variables from the environment
bucket_name = os.environ['BUCKET']
filename = os.environ['FILE_CHANNELS']
DEVELOPER_KEY = os.environ['APIKEY']
# Fetching the channels list file from S3 and reading it into a DataFrame
obj = s3_client.get_object(Bucket=bucket_name, Key= filename)
df_channels = pd.read_csv(obj['Body'])
# Generating statistics for all channels
results = channels_stats(df_channels,DEVELOPER_KEY)
date = pd.to_datetime('today').strftime("%Y%m%d")
# Saving the results to a temporary CSV file
results.to_csv(f'/tmp/youtube_stats_{date}.csv',index = False)
# Uploading the CSV file to S3 and cleaning up the temporary file
s3 = boto3.resource("s3")
s3.Bucket(os.environ['BUCKET_DESTINY']).upload_file(f'/tmp/youtube_stats_{date}.csv', Key=f'youtube_stats_{date}.csv')
os.remove(f'/tmp/youtube_stats_{date}.csv')
return f'file youtube_stats_{date}.csv send succeded'