-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathKEYhurricanehelenesubreddit_withfilters.py
More file actions
100 lines (85 loc) · 4.74 KB
/
KEYhurricanehelenesubreddit_withfilters.py
File metadata and controls
100 lines (85 loc) · 4.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import praw
import pandas as pd
import time
# Define keywords that indicate relevance to Hurricane Helene
positive_emotions_keywords = [
'happy', 'joyful', 'grateful', 'thankful', 'hopeful', 'relieved',
'excited', 'optimistic', 'content', 'satisfied', 'peaceful',
'cheerful', 'delighted', 'elated', 'inspired', 'confident',
'proud', 'lucky', 'blessed', 'euphoric', 'joyous', 'appreciative',
'calm', 'thankful', 'comfortable', 'grinning', 'smiling', 'motivated',
'uplifted', 'refreshed', 'relaxed', 'good', 'glad'
]
negative_emotions_keywords = [
'sad', 'angry', 'frustrated', 'fearful', 'scared', 'hopeless', 'anxious',
'depressed', 'disappointed', 'betrayed', 'guilty', 'hurt', 'nervous',
'lonely', 'ashamed', 'heartbroken', 'desperate', 'grief', 'angst', 'upset',
'devastated', 'terrified', 'dismayed', 'overwhelmed', 'despair',
'miserable', 'distressed', 'shocked', 'helpless', 'vulnerable', 'isolated',
'stressed', 'worried', 'frightened', 'discouraged', 'regretful', 'lost',
'doom', 'bad', 'hate', 'nightmare'
]
# Neutral emotions/feelings keywords
neutral_emotions_keywords = [
'neutral', 'indifferent', 'calm', 'bored', 'apathetic', 'indifferent',
'detached', 'reserved', 'unmoved', 'unconcerned', 'unaffected',
'unenthusiastic', 'unimpressed', 'moderate', 'nonchalant', 'passive',
'unperturbed', 'balanced', 'steady', 'unexcited', 'uninvolved'
]
# Helene keywords
relevant_helene_keywords = ['helene', 'storm helene', 'evacuate', 'evacuation', 'aftermath', 'damage', 'tropical storm', 'destruction', 'damage', 'power outages', 'debris', 'power lines down', 'recovery', 'rescue', 'relief', 'shelter', 'hurricane helene', 'impact', 'effects', 'recovery', 'mental health', 'experience', 'help', 'community', 'support']
# Initialize the Reddit API client
reddit = praw.Reddit(
client_id='R56d6atFieHcJoZqQDQQmQ', # Replace with your client ID
client_secret='NajuFmUCAu8FREQww0u__ag53_m0KA', # Replace with your client secret
user_agent='script:my_hurricane_scraper:v1.0 (by u/sjanesasha)'
)
# Helper functions
def contains_keyword(text, keywords):
return any(keyword.lower() in text.lower() for keyword in keywords)
def get_sentiment(text):
if contains_keyword(text, negative_emotions_keywords):
return 'Negative'
elif contains_keyword(text, positive_emotions_keywords):
return 'Positive'
else:
return 'Neutral'
# Data storage
titles_data, posts_data, comments_data = [], [], []
# Fetch and process posts from different filters
filters = [
lambda: reddit.subreddit('HurricaneHelene').new(limit=1000),
lambda: reddit.subreddit('HurricaneHelene').hot(limit=1000),
lambda: reddit.subreddit('HurricaneHelene').top(time_filter='month', limit=1000),
lambda: reddit.subreddit('HurricaneHelene').rising(limit=1000)
]
for fetch_posts in filters:
print(f"Processing posts from filter: {fetch_posts.__name__}") # Print which filter is being processed
try:
for post in fetch_posts():
print(f"Processing post: {post.title}") # Print post title being processed
# Process post title
if contains_keyword(post.title, relevant_helene_keywords):
sentiment = get_sentiment(post.title)
titles_data.append({'Text': post.title, 'Sentiment': sentiment, 'Type': 'Title', 'Author': str(post.author)})
print(f"Processed title sentiment: {sentiment}") # Print sentiment for the title
# Process post content (selftext)
if contains_keyword(post.selftext, relevant_helene_keywords):
sentiment = get_sentiment(post.selftext)
posts_data.append({'Text': post.selftext, 'Sentiment': sentiment, 'Type': 'Content', 'Author': str(post.author)})
print(f"Processed post content sentiment: {sentiment}") # Print sentiment for the content
# Process comments
post.comments.replace_more(limit=0)
for comment in post.comments.list():
if contains_keyword(comment.body, relevant_helene_keywords):
sentiment = get_sentiment(comment.body)
comments_data.append({'Text': comment.body, 'Sentiment': sentiment, 'Type': 'Comment', 'Author': str(comment.author)})
print(f"Processed comment sentiment: {sentiment}") # Print sentiment for the comment
time.sleep(1) # Respect Reddit's API rate limits
except Exception as e:
print(f"Error occurred: {e}")
# Save results to CSV
pd.DataFrame(titles_data).to_csv('filtered_helene_titles.csv', index=False)
pd.DataFrame(posts_data).to_csv('filtered_helene_posts.csv', index=False)
pd.DataFrame(comments_data).to_csv('filtered_helene_comments.csv', index=False)
print("Data saved successfully!")