-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
200 lines (163 loc) · 8.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import copy
import os.path
import numpy as np
import json
import rasterio as rio
import pandas as pd
""" Global Variables """
img_left_bounds = 0 # Left boundary of image in geo-coordinates
img_right_bounds = 0 # Right boundary of image in geo-coordinates
img_top_bounds = 0 # Top boundary of image in geo-coordinates
img_bottom_bounds = 0 # Bottom boundary of image in geo-coordinates
img_width_px = 0 # Width of image in pixels
img_height_px = 0 # Height of image in pixels
px_per_m = 0 # Number of pixels per metre of geo-coordinates
"""
Main entry point
Handles all methods
Params:
geojson_filepath is the filepath to where the geojson file lives, including the name of the file and the file extension
img_filepath is the filepath to where the relevant image lives, including the name of the image and the file extension
new_csv_name is the filepath to where you want the csv to be saved, including the name of the csv and the file extension
label is the what you want to label the feature, e.g. 'tree' or 'oak'
"""
def here_we_go(geojson_filepath, img_filepath, new_csv_name, id_name, label):
poss_id, tree_annot = get_valid_id(geojson_filepath, img_filepath)
full_coords_list = get_id_coords(poss_id, tree_annot)
list_for_csv = making_lists_for_csv(img_filepath, full_coords_list, id_name, label)
create_csv(list_for_csv, new_csv_name)
"""
Gets a list of id's from the geojson file whose bounding boxes geo-coordinates
are within those of the image
"""
def get_valid_id(geo_json_file, img_filepath):
# Open the GeoJSON file and load it with JSON
geo_j_file = open(geo_json_file)
tree_annotations = json.load(geo_j_file)
# Empty list to put in id numbers of possible bounding boxes in image
poss_id_list = []
# Calculate image coordinates
calc_px_per(img_filepath)
# Check the first x-y coordinate of each id to see if it is within bounds
for i in tree_annotations["features"]: # Check each bounding box
id_num = i["properties"]["fcode"]
# Hold all the arrays of each vertex in the current bounding box
coords_array = np.array(i["geometry"]["coordinates"])
if len(coords_array) > 0: # Make sure the coordinates arrays are not empty
current_bbox = coords_array[0][0] # First vertex of the current bounding box for geojson file
# Make sure that the x and y coordinates of the first vertex are within image bounds
if img_right_bounds >= current_bbox[0] >= img_left_bounds and img_top_bounds >= current_bbox[1] >= \
img_bottom_bounds:
poss_id_list.append(id_num) # Append id to list if first coordinates are within bounds
return poss_id_list, tree_annotations
"""
Calculates image width and height in both pixels and geo-coordinates
Also calculates pixels per metre
"""
def calc_px_per(img_file):
global img_width_px
global img_height_px
global px_per_m
# Update the global variables with the Geolocation boundaries of image
calc_geo_coords_boundaries(img_file)
# Load the image
the_image = rio.open(img_file)
# Width and Height of image in pixels
img_width_px = the_image.width
img_height_px = the_image.height
# Width of image in geo-coordinate metres
x_coords_span = img_right_bounds - img_left_bounds
# Image pixels per geo-coordinate metre
px_per_m = img_width_px / x_coords_span
"""
Calculates the geolocation boundaries of the image & updates global variables
"""
def calc_geo_coords_boundaries(img_file):
global img_left_bounds
global img_right_bounds
global img_top_bounds
global img_bottom_bounds
# Open and read the image file
the_image = rio.open(img_file)
# Calculate the geolocation boundaries of the image (& update global variables)
img_left_bounds = the_image.bounds.left
img_right_bounds = the_image.bounds.right
img_top_bounds = the_image.bounds.top
img_bottom_bounds = the_image.bounds.bottom
"""
Gets geo-coordinates for each id on id_list and stores them in 2D List
"""
def get_id_coords(id_list, annot_file):
# Go through geojson and get coordinates for each id in id_list
full_coords_list = []
for i in id_list:
for j in annot_file["features"]:
if i == j["properties"]["fcode"]:
full_coords_list.append(j["geometry"]["coordinates"])
return full_coords_list
"""
From full_coords_list puts certain coordinates into a separate 2D list as:
image_path (name of the image)
xmin
ymin
xmax
ymax
label (e.g. 'tree')
"""
def making_lists_for_csv(img_name, coord_list, id_name, label):
# Take certain coordinates from full_coords_list and put them into a separate list with the following pattern:
# image_path (name of image); xmin; ymin; xmax; ymax; label (e.g. 'Tree')
# Create empty list
for_csv_list = []
for i in coord_list:
# Get mins and maxs of bounding box
geo_xmin = i[0][0][0]
geo_ymin = i[0][0][1]
geo_xmax = i[0][2][0]
geo_ymax = i[0][1][1]
# Put all vars in the necessary order for a line in csv
temp_list = [img_name, geo_xmin, geo_ymin, geo_xmax, geo_ymax, label]
# Add that bounding box list to the list of all bounding boxes in image
for_csv_list.append(temp_list)
# Return copy of for_csv_list but with the image's pixel min-max for each bounding box instead of the geo versions
return calc_img_px_coords(for_csv_list)
"""
Calculates the image's pixel min-max for bounding box and replaces their geo-coordinate equivalent in for_csv List
"""
def calc_img_px_coords(list_for_csv):
list_for_csv_copy = copy.deepcopy(list_for_csv)
# Calculate image pixel min-max for bounding box and replace their geo-coordinate equivalent in for_csv List
for i in range(len(list_for_csv_copy)):
# The xmin in pixels: geo-coordinates of left edge of bounding box minus geo-coordinates of left edge of image
# Multiplied by pixels per metre to turn geo-coordinate difference into pixel difference
# REMEMBER: image pixel coordinates start top-left, NOT bottom-left
px_xmin = (list_for_csv_copy[i][1] - img_left_bounds) * px_per_m
px_ymin = (img_top_bounds - list_for_csv_copy[i][4]) * px_per_m
px_xmax = (img_right_bounds - list_for_csv_copy[i][3]) * px_per_m
px_ymax = (img_top_bounds - list_for_csv_copy[i][2]) * px_per_m
if px_xmin < 0: # If the left edge of bounding box is past the left edge of the image
px_xmin = 0 # Set the xmin value to the left edge of the image (i.e. 0)
if px_ymin < 0: # If the top edge of bounding box is above the top edge of the image
px_ymin = 0 # Set the ymin to the top edge of the image (i.e. 0)
if px_xmax > img_width_px: # If the right edge of the bounding box is past the right edge of the image
px_xmax = img_width_px # Set the xmax to the width of the image
if px_ymax > img_height_px: # If the bottom edge of the bounding box is below the bottom edge of the image
px_ymax = img_height_px # Set the ymax to the height of the image
# Replace items in positions 1 through 4 in the List
list_for_csv_copy[i][1:5] = [px_xmin, px_ymin, px_xmax, px_ymax]
return list_for_csv_copy
"""
Adds column headers and data to a pandas dataframe then saves it as a csv file
"""
def create_csv(list_for_csv, csv_name):
# Column headers to be in csv file
columns = ["image_path", "xmin", "ymin", "xmax", "ymax", "label"]
# Check if csv file already exists
if os.path.exists(csv_name): # If exists...
temp_df1 = pd.read_csv(csv_name) # Read csv into Pandas dataframe - including column headers
temp_df2 = pd.DataFrame(list_for_csv, columns=columns) # Create Pandas dataframe with new data plus headers
main_df = temp_df1.merge(temp_df2, how="outer") # Create new dataframe with rows of second dataframe below those in first dataframe
main_df.to_csv(csv_name, index=False) # Create csv from new dataframe with column headers and no extra index column
else: # If does not exist...
main_df = pd.DataFrame(list_for_csv, columns=columns) # Create pandas dataframe
main_df.to_csv(csv_name, index=False) # Create csv with column headers and no extra index column