forked from open-mmlab/OpenPCDet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_raw_data.py
395 lines (342 loc) · 14.6 KB
/
convert_raw_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
import os, shutil, json, yaml
from tqdm import tqdm
import numpy as np
import pandas as pd
from plyfile import PlyData
import argparse
# 0. convert intensity in pcd to rgb, then convert back to intensity
###########################################################
# some useful functions
###########################################################
# convert intensity to RGB
def getRGBfromI(RGBint):
blue = RGBint & 255
green = (RGBint >> 8) & 255
red = (RGBint >> 16) & 255
return red, green, blue
# convert RGB to intensity
def getIfromRGB(rgb):
red = rgb[0]
green = rgb[1]
blue = rgb[2]
RGBint = (red<<16) + (green<<8) + blue
return RGBint
# convert ply to bin
def convert_ply(input_path, output_path):
# read data
plydata = PlyData.read(input_path)
data = plydata.elements[0].data
# convert to DataFrame
data_pd = pd.DataFrame(data)
# convert rgb to intensity
data_pd['intensity'] = 0
try:
data_pd = data_pd.drop('red', axis=1)
data_pd = data_pd.drop('green', axis=1)
data_pd = data_pd.drop('blue', axis=1)
except:
pass
# x 100
data_pd['x'] = data_pd['x'].map(lambda x: x * pc_mf)
data_pd['y'] = data_pd['y'].map(lambda x: x * pc_mf)
data_pd['z'] = data_pd['z'].map(lambda x: x * pc_mf)
# record point cloud range
global point_cloud_ranges
point_cloud_ranges[0].append(data_pd['x'].min())
point_cloud_ranges[1].append(data_pd['y'].min())
point_cloud_ranges[2].append(data_pd['z'].min())
point_cloud_ranges[3].append(data_pd['x'].max())
point_cloud_ranges[4].append(data_pd['y'].max())
point_cloud_ranges[5].append(data_pd['z'].max())
# initialize array to store data
data_np = np.zeros(data_pd.shape, dtype=float)
# read names of properties
property_names = list(data_pd.columns)
# read data by property
for i, name in enumerate(
property_names):
data_np[:, i] = data_pd[name]
# save
if output_path.endswith('.bin'):
data_np.astype(float).tofile(output_path)
elif output_path.endswith('.npy'):
np.save(output_path, data_np.astype(float), allow_pickle=True)
# convert pcd to bin
def convert_pcd(input_path, output_path):
with open(input_path, 'r') as file:
lines = file.readlines()
for line in lines:
print(line)
# read cfg file (yaml)
def read_cfg_file(cfg_file):
with open(cfg_file, "r") as stream:
try:
cfg_file = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
return cfg_file
# 1. everything start here
###########################################################
# some useful functions
###########################################################
# get argument from user
parser = argparse.ArgumentParser()
parser.add_argument('--name', type = str, required = False, default = 'custom', help="where is the directory for the labels")
parser.add_argument('--dir', type = str, required = True, help="where is the directory for the labels")
parser.add_argument('--cfg_file', type=str, default=None, required = True, help='specify the config for training')
parser.add_argument('--pc_mf', type=float, default=1, required = True, help='the magnify factor to magnify point clouds')
parser.add_argument('--dxdy_mf', type=float, default=1, required = True, help='the magnify factor to magnify label in dx dy dimension')
args = parser.parse_args()
# global variable
point_cloud_ranges = [[],[],[],[],[],[]]
pc_mf = args.pc_mf # point cloud magnifying factor
dxdy_mf = args.dxdy_mf # magnify factor for dx dy in label dimension
voxel_size_x, voxel_size_y, voxel_size_z = 0.2, 0.2, 0.15 # default
multiplier_x, multiplier_y, multiplier_z = 16, 16, 16 # requirement for condition 1/2 of openpcdet
z_range = 40
# read cfg file
cfg_file = args.cfg_file
cfg = read_cfg_file(cfg_file) # read cfg file
model_name = cfg['MODEL']['NAME']
data_cfg_file = cfg['DATA_CONFIG']['_BASE_CONFIG_']
data_cfg_file = os.path.join('tools', data_cfg_file)
data_cfg = read_cfg_file(data_cfg_file) # read data cfg file
###########################################################
# prepare the direcotry for new custom data
###########################################################
# prepare a new directory to store the converted data
new_directory = args.name
new_directory = os.path.join('data', new_directory)
for i in range(1, 100, 1):
temp = f'{new_directory}_{i}'
if i == 1:
temp = new_directory
if not os.path.isdir(temp):
break
new_directory = temp
os.mkdir(new_directory)
# prepare the sub directory for the data
os.mkdir(os.path.join(new_directory, 'ImageSets'))
os.mkdir(os.path.join(new_directory, 'points'))
os.mkdir(os.path.join(new_directory, 'labels'))
# loop until all conditions are met (refer below: get point cloud range)
while True:
###########################################################
# loop thru all annotation file from labelCloud
###########################################################
# get all data id name
ids = []
ids_with_fail = []
# read json
directory = args.dir
directory = os.path.join(os.getcwd(), directory)
filenames = os.listdir(directory)
for filename in tqdm(range(len(filenames)), desc =f'Raw data + label -> OpenPCDet standardized format (in {new_directory})...'):
# get filename
filename = filenames[filename]
filename = os.path.join(directory, filename)
if filename.endswith('_classes.json'):
continue
# open file
with open(filename) as f:
data = json.load(f)
###########################################################
# prepare point cloud file
###########################################################
# convert the corresponding .ply file to .npy
ply_file = data['path']
npy_file = ply_file[:ply_file.find('.ply')] + '.npy'
npy_file = os.path.split(npy_file)[1]
npy_file = os.path.join(new_directory, 'points', npy_file) # store to new directory
npy_file = npy_file.replace('fail', '')
convert_ply(ply_file, npy_file)
'''
# replace failxxx.json to xxx.json
filename = filename.replace('fail', '')
with open(filename, 'w') as f:
json.dump(data, f, indent=4)
'''
###########################################################
# prepare label
###########################################################
objects = data['objects']
annotations = []
with_fail = False
for obj in objects:
x, y, z = obj['centroid']['x'], obj['centroid']['y'], obj['centroid']['z']
dx, dy, dz = obj['dimensions']['length'], obj['dimensions']['width'], obj['dimensions']['height']
yaw = obj['rotations']['z']
category_name = obj['name'].replace('good', 'pass').replace('defect', 'fail')
if category_name == 'fail':
with_fail = True
temp = [x, y, z, dx, dy, dz, yaw, category_name]
temp = [x * pc_mf, \
y * pc_mf, \
z * pc_mf, \
dx * pc_mf * dxdy_mf, \
dy * pc_mf * dxdy_mf, \
dz * pc_mf, \
yaw, category_name]
annotation = ''
for i in temp:
annotation += str(i)
annotation += ' '
annotation = annotation[:-1]
annotations.append(annotation)
# annotation filename for this point cloud
annot_filename = os.path.join(npy_file[:npy_file.find('points')], 'labels', os.path.split(npy_file)[1][:-4]+'.txt')
annot_filename = annot_filename.replace('fail', '')
with open(annot_filename, "w") as f:
f.writelines("%s\n" % annotation for annotation in annotations) # store to new directory
###########################################################
# get id
###########################################################
id_ = os.path.split(npy_file)[1][:-4].replace('fail', '')
if with_fail:
ids_with_fail.append(id_)
else:
ids.append(id_)
###########################################################
# prepare imagesets
###########################################################
np.random.seed(123)
np.random.shuffle(ids)
np.random.seed(123)
np.random.shuffle(ids_with_fail)
"""
Method A: separate by percentage
"""
train_ids = ids[:int(len(ids)*0.8)] + ids_with_fail[:int(len(ids_with_fail)*0.8)]
val_ids = ids[int(len(ids)*0.8):] + ids_with_fail[int(len(ids_with_fail)*0.8):]
"""
Method B: separate by augmentation
"""
train_ids, val_ids = [], []
cache = []
# this part is for ids_with_fail
for id_ in ids_with_fail:
if 'augmented' in id_:
train_ids.append(id_)
else:
cache.append(id_)
train_ids += cache[:int(len(cache)*0.5)] * 10
val_ids += cache[int(len(cache)*0.5):]
# this part is to handle ids
train_ids += ids[:int(len(ids)*0.5)]
val_ids += ids[int(len(ids)*0.5):]
# shuffle training data
np.random.shuffle(train_ids)
# txt filename (train-val split)
train_txt = os.path.join(npy_file[:npy_file.find('points')], 'ImageSets', 'train.txt')
val_txt = os.path.join(npy_file[:npy_file.find('points')], 'ImageSets', 'val.txt')
with open(train_txt, "w") as f:
f.writelines("%s\n" % id_ for id_ in train_ids)
with open(val_txt, "w") as f:
f.writelines("%s\n" % id_ for id_ in val_ids)
###########################################################
# get point cloud range
###########################################################
# refer https://github.com/yjwong1999/OpenPCDet/blob/master/docs/CUSTOM_DATASET_TUTORIAL.md
# # [xmin, ymin, zmin, xmax, ymax, zmax]
# CONDITION 1: Point cloud range along z-axis / voxel_size is 40 (only for vocel-based detectors)
# CONDITION 2: Point cloud range along x&y-axis / voxel_size is the multiple of 16.
# ori point_cloud_ranges (before condition)
point_cloud_ranges = [np.around(np.min(item), decimals = 2) for item in point_cloud_ranges[:3]] + [np.around(np.max(item), decimals = 2) for item in point_cloud_ranges[3:]]
print(point_cloud_ranges)
##############
# condition 2
##############
for i in range(500):
temp_x = i * multiplier_x * voxel_size_x
temp_y = i * multiplier_y * voxel_size_y
#temp_z = i * multiplier_z * voxel_size_z
sub_condition_x = temp_x > point_cloud_ranges[3] - point_cloud_ranges[0]
sub_condition_y = temp_y > point_cloud_ranges[4] - point_cloud_ranges[1]
#sub_condition_z = temp_z > point_cloud_ranges[5] - point_cloud_ranges[2]
if sub_condition_x and sub_condition_y: #and sub_condition_z:
break
# x axis
point_cloud_ranges[0] = point_cloud_ranges[0]
point_cloud_ranges[3] = point_cloud_ranges[0] + temp_x
# y axis
point_cloud_ranges[1] = point_cloud_ranges[1]
point_cloud_ranges[4] = point_cloud_ranges[1] + temp_y
# z axis
point_cloud_ranges[2] = point_cloud_ranges[2]
point_cloud_ranges[5] = point_cloud_ranges[5] #point_cloud_ranges[2] + temp_z
# round to 2 decimal places
point_cloud_ranges = [np.around(item, decimals=2) for item in point_cloud_ranges]
##############
# condition 1
##############
if model_name in ['PVRCNN', 'SECONDNet']:
z_min = point_cloud_ranges[2]
z_max = point_cloud_ranges[5]
if (z_max - z_min) / voxel_size_z <= z_range:
z_min = z_min
z_max = z_min + z_range * voxel_size_z
point_cloud_ranges[2] = z_min
point_cloud_ranges[5] = z_max
break
else:
print('Restarting, becuz condition 1 (z-axis point cloud range/voxel size = 40) is not met')
print(f'Current voxel_size_z, point_cloud_ranges: {voxel_size_z}, {point_cloud_ranges}')
print(f'Current value: {(z_max - z_min) / voxel_size_z}')
# decrease voxel size
voxel_size_z -= 0.01
if voxel_size_z <= 0.01: voxel_size_z = 0.01
voxel_size_z = np.around(voxel_size_z, decimals=2)
voxel_size_y += 0.01
if voxel_size_y <= 0.01: voxel_size_y = 0.01
voxel_size_y = np.around(voxel_size_y, decimals=2)
voxel_size_x += 0.01
if voxel_size_x <= 0.01: voxel_size_x = 0.01
voxel_size_x = np.around(voxel_size_x, decimals=2)
# decrease magnifying factor
pc_mf -= 1
if pc_mf <=1: pc_mf = 1
# restart point_cloud_ranges
point_cloud_ranges = [ [], [], [], [], [], [] ]
# remove previous things
shutil.rmtree(new_directory)
# recreate the directory
os.mkdir(new_directory)
os.mkdir(os.path.join(new_directory, 'ImageSets'))
os.mkdir(os.path.join(new_directory, 'points'))
os.mkdir(os.path.join(new_directory, 'labels'))
continue
else:
break
print(point_cloud_ranges)
# auto replace point cloud ranges in the dataset yaml file
with open(data_cfg_file, 'r+') as file:
lines = file.readlines()
for i, line in enumerate(lines):
if "POINT_CLOUD_RANGE" in line:
line = f"POINT_CLOUD_RANGE: {str(point_cloud_ranges)}\n"
lines[i] = line
if "VOXEL_SIZE" in line:
line = line.split("VOXEL_SIZE")[0]
line += f"VOXEL_SIZE: {str([voxel_size_x, voxel_size_y, voxel_size_z])}\n"
lines[i] = line
file.seek(0)
file.writelines(lines)
file.truncate()
# auto replace for PointPillar
if model_name in ['PointPillar']:
with open(cfg_file, 'r+') as file:
lines = file.readlines()
for i, line in enumerate(lines):
# modify POINT_CLOUD_RANGE
if "POINT_CLOUD_RANGE" in line:
line = line.split("POINT_CLOUD_RANGE")[0]
line += f"POINT_CLOUD_RANGE: {str(point_cloud_ranges)}\n"
lines[i] = line
# modify VOXEL_SIZE
if "VOXEL_SIZE" in line:
line = line.split("VOXEL_SIZE")[0]
line += f"VOXEL_SIZE: {str([voxel_size_x, voxel_size_y, (np.around(point_cloud_ranges[5] - point_cloud_ranges[2], decimals=2))])}\n"
lines[i] = line
file.seek(0)
file.writelines(lines)
file.truncate()