-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
97 lines (82 loc) · 4 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
import os
from spacy import Language
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D, LSTM, Flatten, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import load_model
from typing import List, Dict, Tuple, Union
from function_utils import function_transform_input_user
from my_config import vocab_size, embedding_dim, max_len
def function_model_training(nb_epochs: int, padded_sequences, training_labels, num_classes: int) -> Sequential:
if os.path.isfile('model/model.h5'):
print("Model already exists. Load it..\n")
model = load_model('model/model.h5')
else:
print("Model does not exist. Create it..\n")
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(LSTM(150, return_sequences=True))
model.add(LSTM(150))
model.add(Flatten())
# model.add(GlobalAveragePooling1D())
# model.add(Dense(units=128, activation='relu'))
# model.add(Dropout(0.3))
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
print(model.summary())
epochs = nb_epochs
history = model.fit(x=padded_sequences, y=np.array(training_labels), epochs=epochs)
model.save('model/model.h5')
# model_loss = pd.DataFrame(model.history.history)
# model_loss.plot()
return model
def function_find_max_proba_treshold(model: Sequential, tokenizer: Tokenizer, stopWords: set, nlp: Language) -> float:
result = model.predict(function_transform_input_user("frezfrezf", tokenizer, stopWords, nlp))
max_proba_treshold = max(result[0])
return max_proba_treshold
def function_return_predict_model(input_sentence: str, model: Sequential, tokenizer: Tokenizer, stopWords: set,
nlp: Language, max_proba_treshold: float) -> Tuple[List[float], bool]:
result = model.predict(function_transform_input_user(input_sentence, tokenizer, stopWords, nlp))
# print("\n")
# print(input_sentence)
# print(result[0])
for proba in result[0]:
if proba > max_proba_treshold + 0.4:
answer_valid = True
break
else:
answer_valid = False
return result, answer_valid
def function_return_type_answer_model(answer_valid: bool, result: List[float], lbl_encoder: LabelEncoder,
data: Dict[str, List[Dict[str, Union[str, List[str]]]]]) -> Tuple[str, Union[None, str, List[str]], str]:
if answer_valid == True:
tag = lbl_encoder.inverse_transform([np.argmax(result)])
for i in data['intents']:
if i['tag'] == tag:
if i['type'] == 'text':
answer_text = np.random.choice(i['responses'])
answer_file_link = None
answer_file_type = None
elif i['type'] == 'file':
answer_text = i['responses']
answer_file_link = i['link']
answer_file_type = "document"
elif i['type'] == 'photo':
answer_text = i['responses']
answer_file_link = i['link']
answer_file_type = "photo"
elif i['type'] == 'multiple_photos':
answer_text = i['responses']
answer_file_link = i['link']
answer_file_type = "photo"
else:
answer_text = np.random.choice(
["I'm sorry I do not undertand", "What did you say ?", "Can you rephrase your sentence differently?"])
answer_file_link = None
answer_file_type = None
return answer_text, answer_file_link, answer_file_type