-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathapp.py
More file actions
112 lines (92 loc) · 4.02 KB
/
app.py
File metadata and controls
112 lines (92 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import pickle, bz2
import numpy as np
import joblib
from flask import Flask, render_template, request
import os
app = Flask(__name__)
import pandas as pd
import nltk
from sklearn.preprocessing import StandardScaler
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize
from collections import Counter
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
# Load the pre-trained model
df = pd.read_csv('trimmed_data.csv')
#rf_pipeline_joblib = joblib.load('modeljob.pkl')
#rf_pipeline_joblib = pickle.load(open("modeljob.pkl", "rb"))
#Converting text to lowercase
df['clean_text'] = df['clean_text'].str.lower()
#Removing Numbers, Special Charecters, and Punctuations
df['clean_text'] = df['clean_text'].str.replace('[^a-zA-Z]', ' ')
#Tokenizing text into different words
df['tokens'] = df['clean_text'].apply(word_tokenize)
stemmer = PorterStemmer()
# Applying Stemming to each token
df['stemmed_tokens'] = df['tokens'].apply(lambda x: [stemmer.stem(token) for token in x])
lemmatizer = WordNetLemmatizer()
#Applying Lemmatization to all tokens
df['lemmatized_tokens'] = df['tokens'].apply(lambda x: [lemmatizer.lemmatize(token) for token in x])
# Retrieve the English stopwords
stop_words = set(stopwords.words('english'))
# Remove the stopwords from the dataset
df['clean_tokens'] = df['tokens'].apply(lambda x: [token for token in x if token not in stop_words])
# Remove the stopwords from the Lemmatized tokens
df['cleaned_tokens'] = df['lemmatized_tokens'].apply(lambda x: [token for token in x if token not in stop_words])
# Combine all lemmatized tokens into a single string
all_tokens = ' '.join(df['tokens'].sum())
# Combine all lemmatized tokens into a single string
all_tokens = ' '.join(df['stemmed_tokens'].sum())
# Combine all lemmatized tokens into a single string
all_tokens = ' '.join(df['cleaned_tokens'].sum())
# Filter rows where "is_depression" is 1
depression_data = df[df['is_depression'] == 1]
# Combine lemmatized tokens
combined_tokens = [token for tokens_list in depression_data['lemmatized_tokens'] for token in tokens_list]
# Combine lemmatized tokens into sentences
df['lemmatized_sentences'] = df['lemmatized_tokens'].apply(lambda tokens: ' '.join(tokens))
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
X = df['lemmatized_sentences'] # Input data (lemmatized sentences)
y = df['is_depression'] # Target labels (0 or 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a pipeline with only Random Forest
rf_pipeline = Pipeline([
('tfidf', TfidfVectorizer()),
('classifier', RandomForestClassifier())
])
# Train the model
rf_pipeline.fit(X_train, y_train)
y_pred = rf_pipeline.predict(X_test)
report = classification_report(y_test, y_pred)
print(f"Classification Report:\n{report}\n")
# IMAGE_FOLDER = os.path.join('static', 'images')
# app.config['UPLOAD_FOLDER'] = IMAGE_FOLDER
@app.route('/')
def index():
return render_template('index.html')
@app.route('/predict', methods=['POST'])
def predict():
text_input = request.form['newsInput']
prediction = rf_pipeline.predict([text_input])
#prediction = rf_pipeline_joblib.predict([text_input])
return render_template('index.html', result=prediction)
@app.route('/get_data')
def get_data():
data_dict = df.to_dict(orient='records')
return jsonify(data_dict)
# @app.route('/index')
# def show_index():
# full_filename = os.path.join(app.config['UPLOAD_FOLDER'], 'loneliness.png')
# return render_template("index.html", html_src_image = full_filename)
if __name__ == "__main__":
app.run(debug=True, port=3004)
#http://127.0.0.1:3004