-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_MnasNet.py
165 lines (142 loc) · 6.24 KB
/
train_MnasNet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import os
import re
import numpy as np
import glob
import tensorflow as tf
from keras import backend as K
from architectures.MnasNet import MnasNet
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.models import Model
from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical
from keras.losses import CategoricalCrossentropy
from sklearn.utils import class_weight
from sequence_loader import SequenceLoader
from keras import backend as K
import cv2 as cv
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
#os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
#py -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
MODEL_PATH = "./nets/MnasNet/"
TRAIN_IMAGES_PATH = "/sp1/train_set/images/"
TRAIN_LABELS_PATH = "/sp1/train_set/all_labels_exp.npy"
TEST_IMAGES_PATH = "/sp1/val_set/images/"
TEST_LABELS_PATH = "/sp1/val_set/all_labels_exp.npy"
BATCH_SIZE = 8 * 3 # BATCH_SIZE * strategy.num_replicas_in_sync
EPOCHS = 25
IMAGE_SHAPE = (224, 224, 3)
AUGMENT = True
SHUFFLE = True
LEARNING_RATE = 0.0001
ALPHA = 1.0
DEPTH = 1
ENDING_STRING = ("AUGFULL" if AUGMENT else "") + ("_SHUFFLE" if SHUFFLE else "")
MODEL_NAME = f"MnasNet_E{EPOCHS}_B{BATCH_SIZE // 3}_A{ALPHA}_DEPTH{DEPTH}_Adam{LEARNING_RATE}_{ENDING_STRING}"
def init():
gpus = tf.config.list_physical_devices('GPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
#strategy = tf.distribute.MultiWorkerMirroredStrategy()
strategy = tf.distribute.MirroredStrategy(cross_device_ops = tf.distribute.HierarchicalCopyAllReduce())
#strategy = tf.distribute.MirroredStrategy(cross_device_ops = tf.distribute.NcclAllReduce())
#strategy = tf.distribute.MirroredStrategy(cross_device_ops = tf.distribute.ReductionToOneDevice())
return strategy
def load_model(strategy, existingModelPath = None):
#def load_model(existingModelPath = None):
if existingModelPath != None:
model = tf.keras.models.load_model(existingModelPath)
else:
with strategy.scope():
model = MnasNet(input_shape = IMAGE_SHAPE, alpha = ALPHA, depth_multiplier = DEPTH, nb_classes = 8)
model.compile(loss = CategoricalCrossentropy(), optimizer = Adam(learning_rate = LEARNING_RATE), metrics = ['accuracy'])
return model
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
return [ atoi(c) for c in re.split(r'(\d+)', text) ]
def load_dataset(labels_path, images_path):
labels = np.load(labels_path)
images_paths_list = glob.glob(images_path + "*.jpg")
images_paths_list.sort(key = natural_keys)
weights = class_weight.compute_class_weight(class_weight = 'balanced', classes = np.unique(labels), y = labels)
weights = dict(enumerate(weights))
labels = to_categorical(labels, num_classes = 8)
augment = True
shuffle = True
sequence = SequenceLoader(images_paths_list, labels, BATCH_SIZE, IMAGE_SHAPE, shuffle, augment)
return sequence, len(images_paths_list), weights
if __name__ == "__main__":
strategy = init()
#init()
model = load_model(strategy)
#model = load_model()
print(" ***** MODEL LOADED ***** ")
train_sequence, train_labels_count, train_weights = load_dataset(TRAIN_LABELS_PATH, TRAIN_IMAGES_PATH)
test_sequence, test_labels_count, test_weights = load_dataset(TEST_LABELS_PATH, TEST_IMAGES_PATH)
print(" ***** SEQUENCES READY ***** ")
history = model.fit(
train_sequence,
steps_per_epoch = train_labels_count // BATCH_SIZE,
class_weight = train_weights,
epochs = EPOCHS,
validation_data = test_sequence,
validation_steps = test_labels_count // BATCH_SIZE,
callbacks = [
ModelCheckpoint(MODEL_PATH + MODEL_NAME + '_E_{epoch:02d}_{val_loss:.3f}_T.tf', monitor = 'val_acc',
save_best_only = False,
save_weights_only = False,
save_format = 'tf'),
],
workers = 12,
use_multiprocessing = False) # False
"""
use_multiprocessing:
Boolean. Used for generator or keras.utils.Sequence input only. If True, use process-based threading. If unspecified, use_multiprocessing will default to False. Note that because this implementation relies on multiprocessing, you should not pass non-picklable arguments to the generator as they can't be passed easily to children processes.
"""
print(" ***** MODEL FITTED ***** ")
for layer in model.layers:
if layer is Dropout:
model.layers.remove(layer)
model.save_weights(MODEL_PATH + MODEL_NAME + '_weights', save_format = 'tf', overwrite = True)
model.save(MODEL_PATH + MODEL_NAME, save_format = 'tf', overwrite = True)
print(" ***** ENDING ***** ")
np.save(MODEL_PATH + '_HIST', history.history)
f = open(MODEL_PATH + MODEL_NAME + "/stats.txt", "w")
f.write("accuracy:\n")
f.write(str(history.history['accuracy']))
f.write("\n")
f.write("val_accuracy:\n")
f.write(str(history.history['val_accuracy']))
f.write("\n\n")
model = tf.keras.models.load_model(MODEL_PATH + MODEL_NAME)
labels = np.load(TEST_LABELS_PATH)
predictions = []
images_paths_list = glob.glob(TEST_IMAGES_PATH + "*.jpg")
images_paths_list.sort(key = natural_keys)
errors = 0
for i in range(len(images_paths_list)):
img_path = images_paths_list[i]
img = cv.imread(img_path, 1)
img = img.reshape(1, 224, 224, 3)
prediction = model.predict(img, verbose = 0)[0]
predictions.append(np.argmax(prediction))
if np.argmax(prediction) != labels[i]:
errors += 1
evaluation = (1 - (errors / (i + 1))) * 100
print(f"{i} / {len(images_paths_list)}\t\tSuccess rate: {evaluation:.3f} % ", end = "\r")
print("\n")
evaluation = (1 - (errors / (len(images_paths_list)))) * 100
print(f"{MODEL_NAME}\nImages: {len(images_paths_list)}\nErrors: {errors}\nSuccess rate: {evaluation:.3f} %\nConfusion matrix:\n{tf.math.confusion_matrix(labels, predictions)}")
f.write(f"{MODEL_NAME}\nImages: {len(images_paths_list)}\nErrors: {errors}\nSuccess rate: {evaluation:.3f} %\nConfusion matrix:\n{tf.math.confusion_matrix(labels, predictions)}")
f.close()
print(" ***** STATS SAVED ***** ")