forked from sandeepv59/Float_Internship
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSVM_text_sample.R
More file actions
52 lines (29 loc) · 1.67 KB
/
SVM_text_sample.R
File metadata and controls
52 lines (29 loc) · 1.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
library(RODBC)
library(tm)
library(RTextTools)
c <- odbcConnect("PostgreSQL30")
transactions1 <<- sqlQuery(c,paste0("SELECT * FROM trans where cat_1 <> '' ORDER BY account_id, date desc"))
a <- transactions1$name[transactions1$cat_2 == "Payroll"]
write.csv(a,'payrolltrans.csv')
# create document term matrix
#dtMatrix <- create_matrix(transactions1["name"])
dtMatrix <- create_matrix(transactions1["name"], language = "english", removePunctuation = TRUE, stripWhitespace = TRUE, toLower = TRUE, removeNumbers = TRUE, stemWords = TRUE, removeStopwords=TRUE, removeSparseTerms = .7)
# To display detailed information on a corpus or a term-document matrix (we use inspect function)
inspect(dtMatrix)
# to find freq of observed terms more than 100 times
findFreqTerms(dtMatrix,100)
# Configure the training data
container <- create_container(dtMatrix, transactions1$cat_1, trainSize = 1:length(transactions1$cat_1),testSize = NULL, virgin=FALSE)
# train a SVM Model
model <- train_model(container, "SVM", kernel="linear", cost=1)
transactions_for <<- sqlQuery(c,paste0("SELECT * FROM trans where user_id = '6f660973-9245-4c24-af48-b9839bd262f6' AND cat_1 = '' ORDER BY account_id, date desc"))
# create a prediction document term matrix
# trace("create_matrix", edit=T)
predMatrix <- create_matrix(transactions_for["name"],originalMatrix=dtMatrix)
# create the corresponding container
predSize = length(transactions_for$name);
predictionContainer <- create_container(predMatrix, transactions_for$cat_1, testSize=1:predSize, virgin=FALSE)
# predict
results <- classify_model(predictionContainer, model)
results
results[results$SVM_PROB >= 0.5,]