-
Notifications
You must be signed in to change notification settings - Fork 1
/
SentimentsAnalysis.py
206 lines (178 loc) · 7.78 KB
/
SentimentsAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
## Sentiments analysis using Python applied to French political speeches
## Lexicons (positive and negative words) used : Chen, Y., & Skiena, S. (2014). Building Sentiment Lexicons for All Major Languages. In ACL (2) (pp. 383-389).
import matplotlib.pyplot as plt
import numpy as np
class Speech:
# name of the speaker
# year of the speech
# nb of positive words
# nb of negative words
# nb total of words
# (positive - negative)/size
def __init__(self, name, year, nbPositiveWords, nbNegativeWords, nbTotalWords):
self.name = name
self.year = year
self.nbPositiveWords = nbPositiveWords
self.nbNegativeWords = nbNegativeWords
self.nbTotalWords = nbTotalWords
self.ratio = (nbPositiveWords-nbNegativeWords)/nbTotalWords*100 # Here, I divide by the number of words because some speeches are longer than others.
def getLexicons(positiveList, negativeList):
## Open positive lexicon
positiveLex = open('positive_words_fr.txt', 'r')
for words in positiveLex:
word=words.rstrip("\n") # to eliminate the line break
positiveList.append(word) # add the word to the list
## Negative lexicon
negativeLex = open('negative_words_fr.txt', 'r')
for words in negativeLex:
word=words.rstrip("\n") # to eliminate the line break
negativeList.append(word) # add the word to the list
## close the files
positiveLex.close()
negativeLex.close()
return positiveList, negativeList
def getSpeechAnalysis(speechName, name, year, positiveList, negativeList):
# speechName : name of the file (ex '.\speech\Macron2018.txt')
# name : name of the speaker
# year : year of the speech
# positiveList, negativeList : lexicons generated before
## Open the speech
speech = open(speechName, 'r')
speechList = [] # create a list where we are going to put all the words
for line in speech:
word=line.rstrip(" ").split() # to eliminate the unwanted blanks and turn the line into a list of words
for element in word: # now, we have words but we have to check that their end isn't a special character
if element[-1]=='.' or element[-1]==",": # to eliminate the point or comma at the end of the word
element = element[:-1]
speechList.append(element.lower()) # we add the word to the list (lowercase to ease later search)
## Count the nb of positive words and negative words
positiveWords = 0
negativeWords = 0
for words in speechList: # we browse the speech
if positiveList.count(words)==1: # if the word is a positive one
positiveWords+=1
if negativeList.count(words)==1: # if the word is a negative one
negativeWords+=1
## Close the files
speech.close()
# we create a speech
speechClass = Speech(name, year, positiveWords, negativeWords, len(speechList))
return speechClass
## Initialization
positiveList = []
negativeList = []
getLexicons(positiveList, negativeList)
# list of all the speeches, speakers and years:
# if you want to add a speech, add it at the end of the array
# the speeches should be ordered by speakers : you can't add a 'Macron's speech' between two 'Hollande's speeches' for instance
Files = [
# ['Name', year, 'speechfile'],
[ 'Macron', 2018, 'Macron2018_voeux.txt'],
[ 'Hollande', 2017, 'Hollande2017_voeux.txt'],
[ 'Hollande', 2016, 'Hollande2016_voeux.txt'],
[ 'Hollande', 2015, 'Hollande2015_voeux.txt'],
[ 'Hollande', 2014, 'Hollande2017_voeux.txt'],
[ 'Hollande', 2013, 'Hollande2013_voeux.txt'],
[ 'Sarkozy', 2012, 'Sarkozy2012_voeux.txt'],
[ 'Sarkozy', 2011, 'Sarkozy2011_voeux.txt'],
[ 'Sarkozy', 2010, 'Sarkozy2010_voeux.txt'],
[ 'Sarkozy', 2009, 'Sarkozy2009_voeux.txt'],
[ 'Sarkozy', 2008, 'Sarkozy2008_voeux.txt'],
[ 'Chirac', 2007, 'Chirac2007_voeux.txt'],
[ 'Chirac', 2006, 'Chirac2006_voeux.txt'],
[ 'Chirac', 2005, 'Chirac2005_voeux.txt'],
[ 'Chirac', 2004, 'Chirac2004_voeux.txt'],
[ 'Chirac', 2003, 'Chirac2003_voeux.txt'],
[ 'Chirac', 2002, 'Chirac2002_voeux.txt'],
[ 'Chirac', 2001, 'Chirac2001_voeux.txt'],
[ 'Chirac', 2000, 'Chirac2000_voeux.txt'],
[ 'Chirac', 1999, 'Chirac1999_voeux.txt'],
[ 'Chirac', 1998, 'Chirac1998_voeux.txt'],
[ 'Chirac', 1997, 'Chirac1997_voeux.txt'],
[ 'Chirac', 1996, 'Chirac1999_voeux.txt'],
[ 'Mitterand', 1995, 'Mitterand1995_voeux.txt'],
[ 'Mitterand', 1994, 'Mitterand1994_voeux.txt'],
[ 'Mitterand', 1993, 'Mitterand1993_voeux.txt'],
[ 'Mitterand', 1992, 'Mitterand1992_voeux.txt'],
[ 'Mitterand', 1991, 'Mitterand1991_voeux.txt'],
[ 'Mitterand', 1990, 'Mitterand1990_voeux.txt'],
[ 'Mitterand', 1989, 'Mitterand1989_voeux.txt'],
[ 'Mitterand', 1988, 'Mitterand1988_voeux.txt'],
[ 'Mitterand', 1987, 'Mitterand1987_voeux.txt'],
[ 'Mitterand', 1986, 'Mitterand1986_voeux.txt'],
[ 'Mitterand', 1985, 'Mitterand1985_voeux.txt'],
[ 'Mitterand', 1984, 'Mitterand1984_voeux.txt'],
[ 'Mitterand', 1983, 'Mitterand1983_voeux.txt'],
[ 'Mitterand', 1982, 'Mitterand1982_voeux.txt'],
[ 'Giscard', 1981, 'Giscard1981_voeux.txt'],
[ 'Giscard', 1979, 'Giscard1979_voeux.txt'],
[ 'Giscard', 1975, 'Giscard1975_voeux.txt']
]
## Results
# array where we are going to put all the results:
speeches = []
# we apply the function to all the speeches
sizeOfFiles = len(Files)
for i in range(sizeOfFiles-1, -1, -1): # browse all the array from end to beginning
speeches.append(getSpeechAnalysis('.\speech\\'+Files[i][2], Files[i][0], Files[i][1], positiveList, negativeList))
# we fill up the informations for all the presidents
iFiles = 0
# do the grouping
Result = []
nameOfSpeaker = "" # at the beginning, the speaker is not defined
ratio = []
year = []
while (iFiles<sizeOfFiles):
if nameOfSpeaker!=speeches[iFiles].name: # if the speaker changes
if (len(nameOfSpeaker)>0): # if the speaker is defined (that doesn't happen when nameOfSpeaker = "")
Result.append([nameOfSpeaker, ratio, year]); # we add its name, its ratio list and year list
nameOfSpeaker=speeches[iFiles].name # we change the name of the actual speaker
ratio = [] # we reinitialize the ratio list
year = [] # same for the year
ratio.append(speeches[iFiles].ratio) # if the speaker is still the same, we add the ratio to its ratio list
year.append(speeches[iFiles].year) # same for the year
iFiles+=1
Result.append([nameOfSpeaker, ratio, year]); # we add the last speaker's parameters to the List
# we create a list with all the ratios
Ratio = []
for i in range(0, len(Result)):
for j in range(0, len(Result[i][1])):
Ratio.append(Result[i][1][j])
# we calculate the moving average and also two limits (inf and sup)
movingAverage = []
movingAverageInf = []
movingAverageSup = []
for i in range(2, len(Ratio)):
movingAverage.append((Ratio[i-2]+Ratio[i-1]+Ratio[i])/3)
movingAverageInf.append((Ratio[i-2]+Ratio[i-1]+Ratio[i])/3-0.5)
movingAverageSup.append((Ratio[i-2]+Ratio[i-1]+Ratio[i])/3+0.5)
# we create a list with the same size than the moving average
yearsMovingAv = [speeches[i].year for i in range(2,sizeOfFiles)]
# first figure :
plt.figure(1)
plt.plot(yearsMovingAv, movingAverage, 'b', label='Moving Average')
plt.plot(yearsMovingAv, movingAverageInf, 'b--')
plt.plot(yearsMovingAv, movingAverageSup, 'b--')
color = ['magenta', 'black', 'yellow', 'blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan']
iColor = 0
for i in range(0, len(Result)):
if (iColor>=len(color)):
iColor=0
plt.scatter(Result[i][2],Result[i][1], c = color[iColor], label=Result[i][0])
iColor+=1
plt.legend()
plt.title('Positiveness ratio in speeches depending on the year')
plt.show()
# second figure :
RatioList = []
for i in range(0, len(Result)):
RatioList.append(Result[i][1])
fig, ax = plt.subplots()
fig.canvas.draw()
labels = [item.get_text() for item in ax.get_xticklabels()]
for i in range(0, len(Result)):
labels[i]=Result[i][0]
plt.boxplot(RatioList)
ax.set_xticklabels(labels)
plt.title('Positiveness ratio in speeches')
plt.show()