-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path01_pipeline_ds_imdb.py
More file actions
39 lines (32 loc) · 1.23 KB
/
01_pipeline_ds_imdb.py
File metadata and controls
39 lines (32 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from transformers import pipeline # Import the pipeline function from the transformers library
from datasets import load_dataset # Import the datasets library to load the IMDB dataset
import json # Import the json library to save results
import os # Import the os library to handle file operations
# Load the dataset
dataset = load_dataset("imdb")
# Load the sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")
# Function to analyze sentiment
def analyze_sentiment(text):
result = sentiment_pipeline(text)
return result[0]['label'], result[0]['score']
# Save the results to a json array file
output_file = "output/sentiment_results.json"
if os.path.exists(output_file):
os.remove(output_file)
# Create a directory for the output file if it doesn't exist
results = []
for i in range(5):
review = dataset['test'][i]['text']
sentiment, score = analyze_sentiment(review)
result = {
"review": review,
"sentiment": sentiment,
"score": score
}
results.append(result)
# Create the output directory if it doesn't exist
os.makedirs(os.path.dirname(output_file), exist_ok=True)
# Write the results to a json file
with open(output_file, 'w') as f:
json.dump(results, f, indent=4)