-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
104 lines (87 loc) · 3.68 KB
/
model.py
File metadata and controls
104 lines (87 loc) · 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
import pandas as pd
import pickle
from statistics import mode
import nltk
from nltk import word_tokenize
from nltk.stem import LancasterStemmer
nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')
from nltk.corpus import stopwords
from tensorflow.keras.models import Model
from tensorflow.keras import models
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Input,LSTM,Embedding,Dense,Concatenate,Attention
from sklearn.model_selection import train_test_split
from bs4 import BeautifulSoup
from utils import clean
from text_summarizer_train import in_tokenizer
# encoder inference
latent_dim=500
#load the model
model = models.load_model("s2s")
#construct encoder model from the output of 6 layer i.e.last LSTM layer
en_outputs,state_h_enc,state_c_enc = model.layers[6].output
en_states=[state_h_enc,state_c_enc]
#add input and state from the layer.
en_model = Model(model.input[0],[en_outputs]+en_states)
# decoder inference
#create Input object for hidden and cell state for decoder
#shape of layer with hidden or latent dimension
dec_state_input_h = Input(shape=(latent_dim,))
dec_state_input_c = Input(shape=(latent_dim,))
dec_hidden_state_input = Input(shape=(max_in_len,latent_dim))
# Get the embeddings and input layer from the model
dec_inputs = model.input[1]
dec_emb_layer = model.layers[5]
dec_lstm = model.layers[7]
dec_embedding= dec_emb_layer(dec_inputs)
#add input and initialize LSTM layer with encoder LSTM states.
dec_outputs2, state_h2, state_c2 = dec_lstm(dec_embedding, initial_state=[dec_state_input_h,dec_state_input_c])
#Attention layer
attention = model.layers[8]
attn_out2 = attention([dec_outputs2,dec_hidden_state_input])
merge2 = Concatenate(axis=-1)([dec_outputs2, attn_out2])
#Dense layer
dec_dense = model.layers[10]
dec_outputs2 = dec_dense(merge2)
# Finally define the Model Class
dec_model = Model(
[dec_inputs] + [dec_hidden_state_input,dec_state_input_h,dec_state_input_c],
[dec_outputs2] + [state_h2, state_c2])
#create a dictionary with a key as index and value as words.
reverse_target_word_index = tr_tokenizer.index_word
reverse_source_word_index = in_tokenizer.index_word
target_word_index = tr_tokenizer.word_index
reverse_target_word_index[0]=' '
def decode_sequence(input_seq):
#get the encoder output and states by passing the input sequence
en_out, en_h, en_c= en_model.predict(input_seq)
#target sequence with inital word as 'sos'
target_seq = np.zeros((1, 1))
target_seq[0, 0] = target_word_index['sos']
#if the iteration reaches the end of text than it will be stop the iteration
stop_condition = False
#append every predicted word in decoded sentence
decoded_sentence = ""
while not stop_condition:
#get predicted output, hidden and cell state.
output_words, dec_h, dec_c= dec_model.predict([target_seq] + [en_out,en_h, en_c])
#get the index and from the dictionary get the word for that index.
word_index = np.argmax(output_words[0, -1, :])
text_word = reverse_target_word_index[word_index]
decoded_sentence += text_word +" "
# Exit condition: either hit max length
# or find a stop word or last word.
if text_word == "eos" or len(decoded_sentence) > max_tr_len:
stop_condition = True
#update target sequence to the current word index.
target_seq = np.zeros((1, 1))
target_seq[0, 0] = word_index
en_h, en_c = dec_h, dec_c
#return the deocded sentence
return decoded_sentence