-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTwitterFIFA.py
More file actions
70 lines (51 loc) · 2.06 KB
/
TwitterFIFA.py
File metadata and controls
70 lines (51 loc) · 2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import snscrape.modules.twitter as sntwitter
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline
tweets = []
query = '#WorldCup2022 lang:en since:2022-11-20 until:2022-11-21'
q = sntwitter.TwitterSearchScraper(query)
#twittersearchscraper to search tweet and append to list
for i, tweet in enumerate(q.get_items()):
if i>1000:
break
tweets.append([tweet.user.username, tweet.date, tweet.likeCount, tweet.sourceLabel, tweet.content])
#Convert to dataframe
tweetsdf = pd.DataFrame(tweets, columns=["User", "Date", "Number of Likes", "Source of Tweet", "Tweet"])
tweetsdf.head()
tweetsdf.to_csv('fifa_2022_scrapped.csv', index=False)
#hugging face
sentiment_analysis = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment-latest")
#tweet preprocessing
def preprocess_tweet(row):
text = row['Tweet']
text = p.clean(text)
return text
tweetsdf['Tweet'] = tweetsdf.apply(preprocess_tweet, axis=1)
tweetsdf.head()
tweetsdf['Tweet'] = tweetsdf['Tweet'].str.lower().str.replace('[^\w\s]',' ').str.replace('\s\s+',' ')
tweetsdf.head()
tweetsdf.tail()
tweetsdf.to_csv('fifa_2022_preprocessed.csv', index=False)
#Predicting sentiment
tweetSA = []
for i, tweet in enumerate(q.get_items()):
if i>30000:
break
content = tweet.content
sentiment = sentiment_analysis(content)
tweetSA.append({"Date": tweet.date, "Number of Likes": tweet.likeCount,
"Source of Tweet": tweet.sourceLabel, "Tweet": tweet.content, 'Sentiment': sentiment[0]['label']})
#converting to dataframe
sadf = pd.DataFrame(tweetSA)
sadf.head()
#sentiment counts
senti_count = sadf.groupby(['Sentiment']).size()
print(senti_count)
#Data Visualization (plotting pie chart)
figure = plt.figure(figsize=(7.5,7.5), dpi=100)
ax = plt.subplot(111)
plt.title(label="FIFA 2022 SENTIMENT ANALYSIS",pad=20)
senti_count.plot.pie(ax=ax, autopct='%1.2f%%', startangle=270, fontsize=12, label="")
#saving file as csv
sadf.to_csv('fifa_2022_analysis.csv', index=False)