-
Notifications
You must be signed in to change notification settings - Fork 11
/
Scraper through ID.py
79 lines (72 loc) · 2.33 KB
/
Scraper through ID.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser
import pafy
import csv
import sys
reload(sys)
sys.setdefaultencoding('utf8')
DEVELOPER_KEY = "#AddYourDeveloperKey"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
pafy.set_api_key("#AddYourAPIKey")
def add_data(vID,title,description,author,published,viewcount, duration, likes, dislikes,rating,category,comments):
data = [vID,title,description,author,published,viewcount, duration, likes, dislikes,rating,category,comments]
with open("scraper.csv", "a") as fp:
wr = csv.writer(fp, dialect='excel')
wr.writerow(data)
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,developerKey=DEVELOPER_KEY)
videoId = raw_input("ID of youtube video : \n")
url = "https://www.youtube.com/watch?v=" + videoId
#Request fro Metadata of the Video
video = pafy.new(url)
#Request for Comments
results = youtube.commentThreads().list(
part="snippet",
maxResults=100,
videoId=videoId,
textFormat="plainText"
).execute()
totalResults = 0
totalResults = int(results["pageInfo"]["totalResults"])
count = 0
nextPageToken = ''
comments = []
further = True
first = True
while further:
halt = False
if first == False:
print "."
try:
results = youtube.commentThreads().list(
part="snippet",
maxResults=100,
videoId=videoId,
textFormat="plainText",
pageToken=nextPageToken
).execute()
totalResults = int(results["pageInfo"]["totalResults"])
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
halt = True
if halt == False:
count += totalResults
for item in results["items"]:
comment = item["snippet"]["topLevelComment"]
author = comment["snippet"]["authorDisplayName"]
text = comment["snippet"]["textDisplay"]
comments.append([author,text])
if totalResults < 100:
further = False
first = False
else:
further = True
first = False
try:
nextPageToken = results["nextPageToken"]
except KeyError, e:
print "An KeyError error occurred: %s" % (e)
further = False
# Adding the full data to CSV
add_data(videoId,video.title,video.description,video.author,video.published,video.viewcount, video.duration, video.likes, video.dislikes,video.rating,video.category,comments)