-
Notifications
You must be signed in to change notification settings - Fork 0
/
chatbot.py
126 lines (89 loc) · 3.37 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
import os
import numpy
import tflearn
import tensorflow
import random
import pickle
import json
#open the json file
with open('intents.json') as file:
data = json.load(file)
all_words = [] #store all words in the entire patterns of our data
labels = [] #store all tags of our data
docs_x = [] #store patterns for training
docs_y = [] #store tags for training
# loop through the intents
for intent in data['intents']:
# loop through patterns of each intent
for pattern in intent['patterns']:
# tokenize pattern: "whats on the menu" --> ["whats", "on", "the", "menu"]
words_in_pattern = nltk.word_tokenize(pattern)
# append tokenized words
all_words.extend(words_in_pattern)
docs_x.append(words_in_pattern)
docs_y.append(intent["tag"])
if intent['tag'] not in labels:
labels.append(intent['tag'])
print(all_words, '\n\n\n', docs_x, '\n\n\n', docs_y, '\n\n\n', labels)
# stemming taking each word in entire word set and bring it down to the root word told / talking -> talk
all_words = [stemmer.stem(w.lower()) for w in all_words if w != "?"]
#remove duplicate words
all_words = sorted(list(set(all_words)))
labels = sorted(labels)
training = []
output = []
#----- One hot encoding -------
out_empty = [0 for _ in range(len(labels))]
for x, doc in enumerate(docs_x):
bag = []
wrds = [stemmer.stem(w.lower()) for w in doc]
for w in all_words:
if w in wrds:
bag.append(1)
else:
bag.append(0)
output_row = out_empty[:]
output_row[labels.index(docs_y[x])] = 1
training.append(bag)
output.append(output_row)
#----------------------------
training = numpy.array(training) #one hot encoded list of patterns --> convert list to numpy array
output = numpy.array(output) #one hot encoded list of tags --> convert list to numpy array
tensorflow.reset_default_graph()
net = tflearn.input_data(shape=[None, len(training[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(output[0]), activation="softmax")
net = tflearn.regression(net)
model = tflearn.DNN(net)
if os.path.exists("./model.tflearn.index"):
model.load("./model.tflearn")
else:
model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)
model.save("./model.tflearn")
def bag_of_words(s, words):
bag = [0 for _ in range(len(words))]
s_words = nltk.word_tokenize(s)
s_words = [stemmer.stem(word.lower()) for word in s_words]
for se in s_words:
for i, w in enumerate(words):
if w == se:
bag[i] = 1
return numpy.array(bag)
def chat():
print("Start talking with the bot (type quit to stop)!")
while True:
inp = input("You: ")
if inp.lower() == "quit":
break
results = model.predict([bag_of_words(inp, all_words)])
results_index = numpy.argmax(results)
tag = labels[results_index]
for tg in data["intents"]:
if tg['tag'] == tag:
responses = tg['responses']
print(random.choice(responses))
chat()