-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy path__init__.py
More file actions
89 lines (78 loc) · 3.28 KB
/
__init__.py
File metadata and controls
89 lines (78 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import subprocess
import shlex
import os.path
import sys
import pandas as pd
from os import getcwd
class PySentiStr:
def __init__(self):
pass
def setSentiStrengthPath(self, ss_Path):
self.SentiStrengthLocation = ss_Path
def setSentiStrengthLanguageFolderPath(self, sslf_Path):
# Ensure it has a forward slash at the end
if sslf_Path[-1] != "/":
sslf_Path += "/"
self.SentiStrengthLanguageFolder = sslf_Path
def getSentiment(self, df_text, score="scale", keywords=None):
if not hasattr(self, "SentiStrengthLocation"):
assert False, "Set path using setSentiStrengthPath(path) function."
if not hasattr(self, "SentiStrengthLanguageFolder"):
assert (
False
), "Set path using setSentiStrengthLanguageFolderPath(path) function."
if type(df_text) != pd.Series:
df_text = pd.Series(df_text)
if keywords is not None and type(keywords) != pd.Series:
keywords = pd.Series(keywords)
if keywords is not None and not len(df_text) == len(keywords):
assert False, "You have to pass keywords to all or none texts."
df_text = df_text.str.replace("\n", "")
df_text = df_text.str.replace("\r", "")
conc_text = "\n".join(df_text)
command_text = (
"java -jar '"
+ self.SentiStrengthLocation
+ "' stdin sentidata '"
+ self.SentiStrengthLanguageFolder
+ "' trinary"
)
if keywords is not None:
command_text = command_text + " keywords '" + ",".join(keywords) + "'"
p = subprocess.Popen(
shlex.split(command_text),
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
b = bytes(conc_text.replace(" ", "+"), "utf-8")
stdout_byte, stderr_text = p.communicate(b)
stdout_text = stdout_byte.decode("utf-8")
stdout_text = stdout_text.rstrip().replace("\t", " ")
stdout_text = stdout_text.replace("\r\n", "")
senti_score = stdout_text.split(" ")
try:
senti_score = list(map(float, senti_score))
except:
raise Exception(stdout_text)
senti_score = [int(i) for i in senti_score]
if score == "scale": # Returns from -4 to 4
senti_score = [
sum(senti_score[i : i + 2]) for i in range(0, len(senti_score), 3)
]
elif score == "binary": # Return 1 if positive and -1 if negative
senti_score = [
1 if senti_score[i] >= abs(senti_score[i + 1]) else -1
for i in range(0, len(senti_score), 3)
]
elif score == "trinary": # Return Positive and Negative Score and Neutral Score
senti_score = [
tuple(senti_score[i : i + 3]) for i in range(0, len(senti_score), 3)
]
elif score == "dual": # Return Positive and Negative Score
senti_score = [
tuple(senti_score[i : i + 2]) for i in range(0, len(senti_score), 3)
]
else:
return "Argument 'score' takes in either 'scale' (between -1 to 1) or 'binary' (two scores, positive and negative rating)"
return senti_score