Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 61 additions & 39 deletions api.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
from flask import Flask, request, jsonify, send_file, render_template
import re
from io import BytesIO

# nltk.download('stopwords')
from io import BytesIO, StringIO # Import StringIO
import nltk
# nltk.download('stopwords') # This is only needed once, not on every run.
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import base64
from flask_cors import CORS


STOPWORDS = set(stopwords.words("english"))

app = Flask(__name__)
CORS(app)

# Load the models and vectorizer *once* when the app starts. VERY IMPORTANT.
predictor = pickle.load(open(r"Models/model_xgb.pkl", "rb"))
scaler = pickle.load(open(r"Models/scaler.pkl", "rb"))
cv = pickle.load(open(r"Models/countVectorizer.pkl", "rb"))


@app.route("/test", methods=["GET"])
Expand All @@ -27,66 +35,76 @@ def home():

@app.route("/predict", methods=["POST"])
def predict():
# Select the predictor to be loaded from Models folder
predictor = pickle.load(open(r"Models/model_xgb.pkl", "rb"))
scaler = pickle.load(open(r"Models/scaler.pkl", "rb"))
cv = pickle.load(open(r"Models/countVectorizer.pkl", "rb"))
try:
# Check if the request contains a file (for bulk prediction) or text input
if "file" in request.files:
# Bulk prediction from CSV file
file = request.files["file"]
data = pd.read_csv(file)

predictions, graph = bulk_prediction(predictor, scaler, cv, data)
if file.filename == '': # added the file name check
return jsonify({"error": "No selected file"})
data = pd.read_csv(file, delimiter = '\t', quoting = 3) # added delimiter and quotechar
predictions, graph = bulk_prediction(data)
# Create a CSV in-memory buffer
csv_buffer = StringIO()
data.to_csv(csv_buffer, index=False)
csv_buffer.seek(0)

response = send_file(
predictions,
BytesIO(csv_buffer.getvalue().encode()),
mimetype="text/csv",
as_attachment=True,
download_name="Predictions.csv",
)

response.headers["X-Graph-Exists"] = "true"

response.headers["X-Graph-Data"] = base64.b64encode(
graph.getbuffer()
).decode("ascii")

response.headers["X-Graph-Data"] = base64.b64encode(graph.getvalue()).decode("ascii")
return response

elif "text" in request.json:
# Single string prediction
text_input = request.json["text"]
predicted_sentiment = single_prediction(predictor, scaler, cv, text_input)

predicted_sentiment = single_prediction(text_input)
return jsonify({"prediction": predicted_sentiment})

except Exception as e:
print(f"Error in /predict: {e}") # More detailed error logging
return jsonify({"error": str(e)})


def single_prediction(predictor, scaler, cv, text_input):
corpus = []
def single_prediction(text_input):
# Use the PRE-TRAINED cv and scaler objects.
global predictor, scaler, cv # Access the global variables

stemmer = PorterStemmer()
review = re.sub("[^a-zA-Z]", " ", text_input)
review = review.lower().split()
review = [stemmer.stem(word) for word in review if not word in STOPWORDS]
review = " ".join(review)
corpus.append(review)
X_prediction = cv.transform(corpus).toarray()

# Transform the SINGLE review using the loaded CountVectorizer.
X_prediction = cv.transform([review]).toarray() # Pass review as a list
X_prediction_scl = scaler.transform(X_prediction)
print("------ Debugging: single_prediction ------")
print("Original text_input:", text_input)
print("Processed review:", review)
print("CountVectorizer feature names (first 50):", cv.get_feature_names_out()[:50]) # Show first 50 features
print("Transformed X_prediction:", X_prediction)
print("Scaled X_prediction_scl:", X_prediction_scl)
print("------------------------------------------")
y_predictions = predictor.predict_proba(X_prediction_scl)

print("Raw Prediction Probabilities:", y_predictions) # Debugging Line

y_predictions = y_predictions.argmax(axis=1)[0]
print("Final Prediction:", y_predictions) # Debugging Line

return "Positive" if y_predictions == 1 else "Negative"


def bulk_prediction(predictor, scaler, cv, data):

def bulk_prediction(data):
global predictor, scaler, cv # Access the global variables
corpus = []
stemmer = PorterStemmer()
for i in range(0, data.shape[0]):
review = re.sub("[^a-zA-Z]", " ", data.iloc[i]["Sentence"])
review = re.sub("[^a-zA-Z]", " ", data.iloc[i]["verified_reviews"])
review = review.lower().split()
review = [stemmer.stem(word) for word in review if not word in STOPWORDS]
review = " ".join(review)
Expand All @@ -99,23 +117,23 @@ def bulk_prediction(predictor, scaler, cv, data):
y_predictions = list(map(sentiment_mapping, y_predictions))

data["Predicted sentiment"] = y_predictions
predictions_csv = BytesIO()
# predictions_csv = BytesIO() # No longer needed here

data.to_csv(predictions_csv, index=False)
predictions_csv.seek(0)
# data.to_csv(predictions_csv, index=False) # No longer needed
# predictions_csv.seek(0)

graph = get_distribution_graph(data)

return predictions_csv, graph
return data, graph # Return data directly


def get_distribution_graph(data):
fig = plt.figure(figsize=(5, 5))
# Create Matplotlib plot
fig = plt.figure(figsize=(7, 7)) # set fig size
colors = ("green", "red")
wp = {"linewidth": 1, "edgecolor": "black"}
tags = data["Predicted sentiment"].value_counts()
explode = (0.01, 0.01)

explode = (0.1, 0.1)
tags.plot(
kind="pie",
autopct="%1.1f%%",
Expand All @@ -129,11 +147,15 @@ def get_distribution_graph(data):
ylabel="",
)

graph = BytesIO()
plt.savefig(graph, format="png")
plt.close()

return graph
# Convert plot to bytes
img_buffer = BytesIO()
plt.savefig(img_buffer, format='png')
plt.close(fig) # Close the figure to free memory
img_buffer.seek(0) # Rewind the buffer to the beginning

return img_buffer



def sentiment_mapping(x):
Expand All @@ -144,4 +166,4 @@ def sentiment_mapping(x):


if __name__ == "__main__":
app.run(port=5000, debug=True)
app.run(port=5000, debug=True)
111 changes: 88 additions & 23 deletions templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,105 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Text Sentiment Predictor</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css" />
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css">
<style>
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&display=swap');

body {
font-family: 'Poppins', sans-serif;
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
}

.glass-card {
background: rgba(255, 255, 255, 0.1);
backdrop-filter: blur(12px);
-webkit-backdrop-filter: blur(12px);
border-radius: 20px;
border: 1px solid rgba(255, 255, 255, 0.2);
box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37);
}

.input-glow:focus {
box-shadow: 0 0 15px rgba(79, 70, 229, 0.5);
}

.neon-text {
text-shadow: 0 0 10px #4f46e5, 0 0 20px #4f46e5, 0 0 30px #4f46e5;
}
</style>
</head>

<body>
<h1>Text Sentiment Prediction</h1>
<body class="text-gray-100">
<div class="min-h-screen flex items-center justify-center p-6">
<div class="glass-card p-8 w-full max-w-3xl">
<h1
class="text-4xl font-bold text-center mb-8 bg-gradient-to-r from-blue-400 to-purple-500 bg-clip-text text-transparent">
Text Sentiment Prediction
</h1>

<form id="predictionForm">
<input type="file" id="csvFileInput" accept=".csv">
<textarea id="textInput" placeholder="Enter text..."></textarea>
<button type="button" onclick="predict()">Predict</button>
<button id="downloadBtn" style="display:none" onclick="downloadPredictions()">Download Predictions</button>
</form>
<form id="predictionForm" class="space-y-6">
<div class="group">
<label class="block text-lg font-medium mb-2">Upload CSV File</label>
<div class="relative cursor-pointer">
<input type="file" id="csvFileInput" accept=".csv"
class="opacity-0 absolute inset-0 w-full h-full cursor-pointer">
<div
class="p-4 border-2 border-dashed border-gray-500 rounded-xl group-hover:border-purple-400 transition-colors">
<div class="text-center space-y-2">
<i class="fas fa-cloud-upload-alt text-3xl text-purple-400"></i>
<p class="text-gray-300">Drag & drop or click to upload</p>
<p class="text-sm text-gray-400">Supports CSV files only</p>
</div>
</div>
</div>
</div>

<div id="predictionResult"></div>
<div id="graphContainer"></div>
<div class="relative">
<label class="block text-lg font-medium mb-2">Or Enter Text Directly</label>
<textarea id="textInput" rows="4"
class="w-full bg-gray-800/50 rounded-lg p-4 border-2 border-gray-600 focus:border-purple-400 focus:ring-2 focus:ring-purple-500 input-glow transition-all text-black"
placeholder="Type your text here..."></textarea>
</div>

<button type="button" onclick="predict()"
class="w-full py-4 bg-purple-600 hover:bg-purple-700 text-xl font-bold rounded-lg transition-all transform hover:scale-105 active:scale-95">
🔮 Predict Sentiment
</button>
</form>

<div class="mt-8">
<h3 class="text-2xl font-bold mb-4 text-purple-400">Prediction Result</h3>
<div id="predictionResult"
class="text-3xl font-bold min-h-32 flex items-center justify-center glass-card p-6">
⏳ Waiting for input...
</div>
</div>

<div class="mt-8">
<h3 class="text-2xl font-bold mb-4 text-blue-400">Graph Result</h3>
<div id="graphContainer"
class="aspect-video bg-gray-800/50 rounded-xl flex items-center justify-center glass-card p-6">
<p class="text-gray-400">Graph will appear here</p>
</div>
</div>

<button id="downloadBtn" style="display:none" onclick="downloadPredictions()"
class="w-full mt-8 py-4 bg-green-600 hover:bg-green-700 text-xl font-bold rounded-lg transition-all transform hover:scale-105">
<i class="fas fa-download mr-3"></i>
Download Predictions
</button>
</div>
</div>

<script>
function predict() {
// Check if CSV file is present
var csvFileInput = document.getElementById("csvFileInput");
var textInput = document.getElementById("textInput");
var predictionResult = document.getElementById("predictionResult");
var graphContainer = document.getElementById("graphContainer");

if (csvFileInput.files.length > 0) {
// Upload CSV file
var formData = new FormData();
formData.append("file", csvFileInput.files[0]);

Expand All @@ -40,22 +114,15 @@ <h1>Text Sentiment Prediction</h1>
})
.then(response => {
if (response.headers.get('X-Graph-Exists') === 'true') {
console.log("Graph")
var graphData = response.headers.get('X-Graph-Data');
displayGraph(graphData);
}

return response.blob();
})
.then(blob => {
console.log("Blob:", blob);

document.getElementById("downloadBtn").style.display = "block";
document.getElementById("downloadBtn").onclick = function () {
console.log("Downloading...");
var url = URL.createObjectURL(blob);
console.log("URL:", url);

var a = document.createElement("a");
a.href = url;
a.download = "Predictions.csv";
Expand All @@ -69,7 +136,6 @@ <h1>Text Sentiment Prediction</h1>
});

} else if (textInput.value.trim() !== "") {
// Predict on single sentence
fetch("http://localhost:5000/predict", {
method: "POST",
headers: {
Expand All @@ -79,24 +145,23 @@ <h1>Text Sentiment Prediction</h1>
})
.then(response => response.json())
.then(data => {
console.log(data)
predictionResult.innerHTML = "Predicted sentiment: " + data.prediction;
});
}
}

function downloadPredictions() {
console.log("Download prediction")
console.log("Download prediction");
}

function displayGraph(graphData) {
var graphUrl = "data:image/png;base64," + graphData;
var img = document.createElement('img');
img.src = graphUrl;
graphContainer.innerHTML = "";
graphContainer.appendChild(img);
}
</script>

</body>

</html>
Loading