-
Notifications
You must be signed in to change notification settings - Fork 0
/
rasptestrealtime.py
152 lines (130 loc) · 4.49 KB
/
rasptestrealtime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import pyaudio
# import sys
import time
import os
import subprocess
import numpy as np
import matplotlib.pyplot as plt
from multiprocessing import Queue
from tensorflow.keras.models import load_model
def graph_spectogram(data):
'''Draws a spectogram of the wavfile load through its arguement
Arguements:
wav_file -- a file ending with wav extension
Returns:
returns a spectogram picture'''
nfft = 200 # length of each window segment
fs = 44100 # sampling freequencies
noverlap = 120
nchannels = data.ndim
if nchannels == 1:
pxx, freqs, bins, im = plt.specgram(data, nfft, fs, noverlap=noverlap)
elif nchannels == 2:
pxx, freqs, bins, im = plt.specgram(data[:, 0], nfft,
fs, noverlap=noverlap)
return pxx
def detect_triggerword(audio, model):
x = graph_spectogram(audio)
x = x.swapaxes(0, 1)
x = np.expand_dims(x, axis=0)
predictions = model.predict(x)
return predictions
def triggerword_heard(predictions, chunk_duration,
feed_duration, threshold=0.45):
"""
Function to detect new trigger word in the latest chunk of input audio.
It is looking for the rising edge of the prediction data belonging to the
last/latest chunk.
Argument:
predictions -- predicted labels from model
chunk_duration -- time in second of a chunk
feed_duration -- time in second of the input to model
threshold -- threshold for probability above a certain to be considered
positive
Returns:
True if new trigger word detected in the latest chunk
"""
predictions = predictions.reshape(-1)
# change predictions to a 0, 1 array
predictions = predictions > threshold
chunk_pred_samples = int(len(predictions) *
chunk_duration / feed_duration)
chunk_pred = predictions[-chunk_pred_samples:]
level = chunk_pred[0]
for pred in chunk_pred:
if pred > level:
return True
else:
level = pred
return False
# for pred in chunk_pred:
# if pred > threshold:
# return True
# return False
def get_audio_stream(callback):
chunk_duration = 0.5 # step_size for one 10 sec window
form = pyaudio.paInt16
channels = 1
rate = 44100
chunk_samples = int(rate * chunk_duration)
p = pyaudio.PyAudio()
stream = p.open(format=form, channels=channels, rate=rate, input=True,
frames_per_buffer=chunk_samples, stream_callback=callback)
return stream
python_path = "/usr/bin/python3"
commandfile_path = str(os.getcwd()) + '/commandspred.py'
t1 = time.time()
model = load_model('triggerword/best_trigger_model_TM_30.h5')
chunk_duration = 0.5 # step_size for one 10 sec window
rate = 44100
chunk_samples = int(rate * chunk_duration)
feed_duration = 10 # length of our window the one to be fed into the model
feed_samples = int(rate * feed_duration)
q = Queue()
run = True
silence_threshold = 100
timeout = time.time() + 30 # define the duration of the demo
data = np.zeros(feed_samples, dtype='int16')
def callback(in_data, frame_count, time_info, status):
# changing these variables here will change them in the whole code
global run, timeout, data, silence_threshold
if time.time() > timeout:
run = False
return(in_data, pyaudio.paComplete)
data0 = np.frombuffer(in_data, dtype='int16')
if np.abs(data0).mean() < silence_threshold:
print('-')
# skips the parts where the input is so low
return (in_data, pyaudio.paContinue)
else:
print('.')
data = np.append(data, data0)
if len(data) > feed_samples:
data = data[-feed_samples:]
# process data asychronously by sending a queue
q.put(data)
return (data, pyaudio.paContinue)
print("getting the audio stream for 30 secs")
stream = get_audio_stream(callback)
stream.start_stream()
try:
while run:
if not q.empty():
data = q.get()
pred = detect_triggerword(data, model)
new_trigger = triggerword_heard(pred, chunk_duration,
feed_duration)
if new_trigger:
# print(1)
subprocess.run([python_path, commandfile_path])
except (KeyboardInterrupt, SystemExit):
stream.stop_stream()
stream.close()
timeout = time.time()
run = False
exit()
stream.stop_stream()
stream.close()
t2 = time.time()
print(f'time taken = {t2-t1} seconds')
exit()