-
Notifications
You must be signed in to change notification settings - Fork 3
/
preprocess.py
102 lines (85 loc) · 3.47 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from __future__ import print_function
import argparse
import os
from preprocessing import csv_utils
PROJECT_ID = 'lepton-maps-207611'
GS_BUCKET = 'gs://lepton'
# def crowdai_preprocess(args):
# parser = argparse.ArgumentParser()
# parser.add_argument('-c', '--cloud', action='store_true')
# pargs, rem_args = parser.parse_known_args(args)
# if pargs.cloud:
# pipeline_args = ('--project {project} '
# '--runner DataFlowRunner '
# '--staging_location {bucket}/staging '
# '--temp_location {bucket}/temp '
# '--working {bucket}/data/mapping_challenge '
# '--setup_file ./setup.py ').format(project=PROJECT_ID, bucket='gs://lepton').split()
# else:
# pipeline_args = ('--project {project} '
# '--runner DirectRunner '
# '--staging_location {bucket}/staging '
# '--temp_location {bucket}/temp '
# '--working {bucket}/data/mapping_challenge ').format(project=PROJECT_ID, bucket='.').split()
# print()
# print("crowdai_preprocess " + ' '.join(pipeline_args))
# crowdai.run(pipeline_args)
def combine_csvs(args):
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--files', nargs='+')
parser.add_argument('--main', required=True)
pargs, _ = parser.parse_known_args(args)
columns = ("image_id", "file_path_image", "file_path_mask")
frame = csv_utils.combine_csvs(columns, pargs.files)
csv_utils.save_to_csv(os.path.join('data', pargs.main), frame, columns, index=False)
print()
print("combine csvs")
def data_split(args):
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--files', nargs='+')
parser.add_argument('--folder', required=True)
# parser.add_argument('-d', '--dev-mode', action='store_true')
pargs, _ = parser.parse_known_args(args)
# portion = 0.001 if pargs.dev_mode else 1.0
try:
os.makedirs(os.path.join('data', pargs.folder))
except:
pass
csv_utils.train_val_split(pargs.files,
1082,
os.path.join('data', pargs.folder, 'train.csv'),
os.path.join('data', pargs.folder, 'val.csv'))
print()
print("train-valid split")
def localize(args):
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--files', nargs='+')
pargs, _ = parser.parse_known_args(args)
print()
print("localizing...")
for filename in pargs.files:
try:
csv_utils.create_local_version(filename)
except:
print('{} failed'.format(filename))
def datafy(args):
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--files', nargs='+')
pargs, _ = parser.parse_known_args(args)
print()
print("datafying...")
for filename in pargs.files:
csv_utils.datafy(filename)
PROCESSES = {
# 'crowdai': crowdai_preprocess,
'split': data_split,
'localize': localize,
'combine': combine_csvs,
'datafy': datafy
}
if __name__ == '__main__':
main_parser = argparse.ArgumentParser()
main_parser.add_argument('-p', '--process', required=True, help="Name of process to run")
main_args, process_args = main_parser.parse_known_args()
print("Running preprocess for {}".format(main_args.process.upper()))
PROCESSES[main_args.process](process_args)