-
Notifications
You must be signed in to change notification settings - Fork 0
/
audio_augmentation.py
executable file
·131 lines (98 loc) · 4.81 KB
/
audio_augmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu May 21 14:42:54 2020
@author: sanchit
"""
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa, librosa.display
from random import uniform
%matplotlib inline
class Audio_augmentation:
"""audio augmentation"""
def __init__(self, audio_data, sampling_rate):
self.audio_data = audio_data
self.sampling_rate = sampling_rate
self.audio_len = len(audio_data)
# fixed parameters for computing mel spectrogram
# parameters for computing short-time FT
self.n_fft = 2048
self.hop_length = int(self.n_fft / 4)
def _compute_mel_spectrogram(self, data):
"""compute mel spectrogram of the signal"""
mel_spectrogram = librosa.feature.melspectrogram(data, sr=self.sampling_rate,
n_fft=self.n_fft, hop_length=self.hop_length)
return librosa.power_to_db(abs(mel_spectrogram))
def plot_signals(self, augment_data, title=""):
"""plot original and augmented signals along with their spectrogram"""
fig, (ax1, ax2, ax3) = plt.subplots(3, figsize=(15,8))
x = np.arange(0, self.audio_len)
ax1.plot(x, self.audio_data)
ax1.set_title('origianl audio waveform')
ax1.set_xlabel('samples')
ax1.set_ylabel('amplitude')
assert self.audio_len == len(augment_data)
ax2.plot(np.arange(0, len(self.audio_data)), augment_data)
ax2.set_title(title)
ax2.set_xlabel('samples')
ax2.set_ylabel('amplitude')
# compute and plot the mel-spectrogram
log_mel_spect = self._compute_mel_spectrogram(augment_data)
librosa.display.specshow(log_mel_spect, sr=self.sampling_rate,
hop_length=self.hop_length, x_axis='time', y_axis='mel',
ax=ax3)
ax3.set_title("Mel Spectrogram")
plt.show()
def play_signal(self, data):
"""play audio data"""
return ipd.Audio(data, rate=self.sampling_rate)
def speeding_up_down(self):
"""change speed of the audio signal (either fast or low), i.e., time stretching"""
# rate between (0,1) will slow down the signal, i.e., it will add more samples
rate = uniform(0.5, 0.9)
data_down = librosa.effects.time_stretch(self.audio_data, rate)
# rate > 1 will make the signal go fast, i.e., it will remove samples
rate = uniform(1.1, 1.5)
data_up = librosa.effects.time_stretch(self.audio_data, rate)
# time streching either add or remove samples, therefore, maintaing the
# same size/length of augmented data is important
if len(data_down) > self.audio_len:
# just truncate the last samples
data_down = data_down[:self.audio_len]
if len(data_up) < self.audio_len:
# otherwise pad the array with zeros at the end
data_up = np.pad(data_up, (0, self.audio_len-len(data_up)), mode="constant")
return (data_down, data_up)
def add_random_noise(self):
"""add white Gaussion noise to the data"""
wgn = np.random.randn(self.audio_len)
return self.audio_data + 0.005*wgn
def shift_data(self, factor=0.2):
"""roll (circular shift) the data on both directions"""
len_roll = int(self.audio_len*factor)
return np.roll(self.audio_data, len_roll), np.roll(self.audio_data, -len_roll)
def change_pitch(self):
""" change the pitch (frequencies) of the signal """
steps = np.random.uniform(low=-4, high=4)
return librosa.effects.pitch_shift(self.audio_data, self.sampling_rate, n_steps=steps)
def change_loudness(self):
"""change the loudness (amplitude) of the signal"""
factor = np.random.uniform(low=-4, high=4)
return self.audio_data*factor
class Mel_spectrogram:
def __init__(self, audio_data, sampling_rate):
self.audio_data = audio_data
self.sampling_rate = sampling_rate
# fixed parameters for computing mel spectrogram parameters for
# computing short-time FT
self.n_fft = 2048
self.hop_length = int(self.n_fft / 4)
def compute_mel_spectrogram(self):
"""compute mel spectrogram of the audion data"""
mel_spectrogram = librosa.feature.melspectrogram(self.audio_data, sr=self.sampling_rate,
n_fft=self.n_fft, hop_length=self.hop_length)
return librosa.power_to_db(abs(mel_spectrogram))
def plot_mel_spectrogram(self):
pass