-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtreino_do_modelo.py
More file actions
76 lines (57 loc) · 2.16 KB
/
treino_do_modelo.py
File metadata and controls
76 lines (57 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import json
import numpy as np
import tensorflow as tf
import pickle
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
import colorama
with open('intents.json') as file:
data = json.load(file)
training_sentences = []
training_labels = []
labels = []
responses = []
for intent in data['intents']:
for pattern in intent['patterns']:
training_sentences.append(pattern)
training_labels.append(intent['tag'])
responses.append(intent['responses'])
if intent['tag'] not in labels:
labels.append(intent['tag'])
num_classes = len(labels)
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)
vocab_size = 1000
embedding_dim = 16
max_len = 20
oov_token = "<OOV>"
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
model.summary()
epochs = 500
history = model.fit(padded_sequences, np.array(training_labels), epochs=epochs)
# to save the trained model
model.save("chat_model.keras")
import pickle
# to save the fitted tokenizer
with open('tokenizer.pickle', 'wb') as handle:
pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
# to save the fitted label encoder
with open('label_encoder.pickle', 'wb') as ecn_file:
pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)