Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Commit

Permalink
Merge pull request #278 from FIRST-Tech-Challenge/pr_better_blob_stor…
Browse files Browse the repository at this point in the history
…age_organization

Changed the blob directory structure
  • Loading branch information
cmacfarl authored Sep 12, 2022
2 parents f932242 + e15740a commit b10b8aa
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 261 deletions.
2 changes: 0 additions & 2 deletions server/app_engine/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,12 @@
ACTION_NAME_INCREMENT_REMAINING_TRAINING_MINUTES = 'increment_remaining_training_minutes'
ACTION_NAME_SAVE_END_OF_SEASON_ENTITIES = 'save_end_of_season_entities'
ACTION_NAME_RESET_TEAM_ENTITIES = 'reset_team_entities'
ACTION_NAME_EXPUNGE_BLOB_STORAGE = 'expunge_blob_storage'

def create_action_parameters(team_uuid, action_name):
if (action_name == ACTION_NAME_RESET_REMAINING_TRAINING_MINUTES or
action_name == ACTION_NAME_INCREMENT_REMAINING_TRAINING_MINUTES or
action_name == ACTION_NAME_SAVE_END_OF_SEASON_ENTITIES or
action_name == ACTION_NAME_RESET_TEAM_ENTITIES or
action_name == ACTION_NAME_EXPUNGE_BLOB_STORAGE or
action_name == ACTION_NAME_TEST):
is_admin_action = True
else:
Expand Down
58 changes: 4 additions & 54 deletions server/app_engine/app_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,29 +173,6 @@ def validate_string_not_empty(s):
raise exceptions.HttpErrorBadRequest(message)


def validate_team_uuid_prefixes(s):
team_uuid_prefixes = []
tokens = s.split(',')
valid = True
allowed = '0123456789abcdef'
for token in tokens:
if len(token) > 32:
valid = False
break
for c in token:
if c not in allowed:
valid = False
break
if not valid:
break
team_uuid_prefixes.append(token)
if valid:
return team_uuid_prefixes
message = "Error: '%s' is not a valid argument." % s
logging.critical(message)
raise exceptions.HttpErrorBadRequest(message)


def validate_boolean(s):
if s == 'false':
return False
Expand Down Expand Up @@ -1512,8 +1489,8 @@ def create_tflite():
# storage.retrieve_model_entity will raise HttpErrorNotFound
# if the team_uuid/model_uuid is not found.
model_entity = storage.retrieve_model_entity(team_uuid, model_uuid)
model_folder = model_entity['model_folder']
exists, download_url = blob_storage.get_tflite_model_with_metadata_url(model_folder)
tflite_files_folder = model_entity['tflite_files_folder']
exists, download_url = blob_storage.get_tflite_model_with_metadata_url(tflite_files_folder)
if exists:
blob_storage.set_cors_policy_for_get()
else:
Expand All @@ -1535,8 +1512,8 @@ def get_tflite_download_url():
# storage.retrieve_model_entity will raise HttpErrorNotFound
# if the team_uuid/model_uuid is not found.
model_entity = storage.retrieve_model_entity(team_uuid, model_uuid)
model_folder = model_entity['model_folder']
exists, download_url = blob_storage.get_tflite_model_with_metadata_url(model_folder)
tflite_files_folder = model_entity['tflite_files_folder']
exists, download_url = blob_storage.get_tflite_model_with_metadata_url(tflite_files_folder)
if exists:
blob_storage.set_cors_policy_for_get()
response = {
Expand Down Expand Up @@ -1640,33 +1617,6 @@ def resetTeamEntities():
}
return flask.jsonify(__sanitize(response))

@app.route('/expungeBlobStorage', methods=['POST'])
@handle_exceptions
@login_required
@roles_accepted(roles.Role.GLOBAL_ADMIN, roles.Role.ML_DEVELOPER)
def expunge_blob_storage():
data = validate_keys(flask.request.form.to_dict(flat=True),
['date_time_string', 'keep_tflite_and_labels', 'team_uuid_prefixes'])
date_time_string = data.get('date_time_string')
keep_tflite_and_labels = validate_boolean(data.get('keep_tflite_and_labels'))
team_uuid_prefixes = validate_team_uuid_prefixes(data.get('team_uuid_prefixes'))
action_uuids = []
for team_uuid_prefix in team_uuid_prefixes:
action_parameters = action.create_action_parameters(
'', action.ACTION_NAME_EXPUNGE_BLOB_STORAGE)
action_parameters['date_time_string'] = date_time_string
action_parameters['keep_tflite_and_labels'] = keep_tflite_and_labels
action_parameters['team_uuid_prefix'] = team_uuid_prefix
action_parameters['num_blobs_deleted'] = 0
action_parameters['num_blobs_not_deleted'] = 0
action_uuid = action.trigger_action_via_blob(action_parameters)
action_uuids.append(action_uuid)

response = {
'action_uuids': action_uuids,
}
return flask.jsonify(__sanitize(response))


# performActionGAE is for debugging purposes only.
@app.route('/performActionGAE', methods=['POST'])
Expand Down
116 changes: 38 additions & 78 deletions server/app_engine/blob_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

BUCKET_BLOBS = ('%s-blobs' % constants.PROJECT_ID)

CURRENT_SEASON = '2022_2023'

# blob storage

def __retrieve_blob(blob_name):
Expand Down Expand Up @@ -116,7 +118,7 @@ def __delete_blobs(blob_names):
# video files

def get_video_blob_name(team_uuid, video_uuid):
return '%s/video_files/%s' % (team_uuid, video_uuid)
return '%s/video_files/%s/%s' % (CURRENT_SEASON, team_uuid, video_uuid)

def prepare_to_upload_video(team_uuid, video_uuid, content_type):
video_blob_name = get_video_blob_name(team_uuid, video_uuid)
Expand All @@ -142,7 +144,7 @@ def delete_video_blob(video_blob_name):
# video frame images

def store_video_frame_image(team_uuid, video_uuid, frame_number, content_type, image):
image_blob_name = '%s/image_files/%s/%05d' % (team_uuid, video_uuid, frame_number)
image_blob_name = '%s/image_files/%s/%s/%05d' % (CURRENT_SEASON, team_uuid, video_uuid, frame_number)
__write_string_to_blob(image_blob_name, image, content_type)
return image_blob_name

Expand All @@ -164,7 +166,7 @@ def delete_video_frame_images(image_blob_names):
# dataset records

def get_dataset_folder(team_uuid, dataset_uuid):
return '%s/tf_records/%s' % (team_uuid, dataset_uuid)
return '%s/tf_records/%s/%s' % (CURRENT_SEASON, team_uuid, dataset_uuid)

def get_dataset_folder_path(team_uuid, dataset_uuid):
return __get_path(get_dataset_folder(team_uuid, dataset_uuid))
Expand Down Expand Up @@ -193,7 +195,7 @@ def delete_dataset_blobs(blob_names):
# dataset zips

def __get_dataset_zip_blob_name(team_uuid, dataset_zip_uuid, partition_index):
return '%s/dataset_zips/%s/%s' % (team_uuid, dataset_zip_uuid, partition_index)
return '%s/dataset_zips/%s/%s/%s' % (CURRENT_SEASON, team_uuid, dataset_zip_uuid, partition_index)

def store_dataset_zip(team_uuid, dataset_zip_uuid, partition_index, zip_data):
blob_name = __get_dataset_zip_blob_name(team_uuid, dataset_zip_uuid, partition_index)
Expand All @@ -220,7 +222,7 @@ def get_old_model_folder(team_uuid, model_uuid):
return 'models/%s/%s' % (team_uuid, model_uuid)

def get_model_folder(team_uuid, model_uuid):
return '%s/models/%s' % (team_uuid, model_uuid)
return '%s/models/%s/%s' % (CURRENT_SEASON, team_uuid, model_uuid)

def get_model_folder_path(model_folder):
return __get_path(model_folder)
Expand Down Expand Up @@ -296,14 +298,20 @@ def get_trained_checkpoint_path(model_folder):
return __get_path(blob_name)
return ''

def __get_tflite_folder(model_folder):
def get_old_tflite_folder(model_folder):
return '%s/tflite' % model_folder

def get_tflite_folder_path(model_folder):
return __get_path(__get_tflite_folder(model_folder))
def get_tflite_files_folder(team_uuid, model_uuid):
return '%s/tflite_files/%s/%s' % (CURRENT_SEASON, team_uuid, model_uuid)

def get_tflite_files_folder_path(tflite_files_folder):
return __get_path(tflite_files_folder)

def get_tflite_saved_model_parent_path(model_folder):
return __get_path('%s/tflite' % model_folder)

def __get_tflite_saved_model_folder(model_folder):
return '%s/saved_model' % __get_tflite_folder(model_folder)
return '%s/tflite/saved_model' % model_folder

def get_tflite_saved_model_path(model_folder):
return __get_path(__get_tflite_saved_model_folder(model_folder))
Expand All @@ -317,7 +325,7 @@ def tflite_saved_model_exists(model_folder):
return False

def __get_tflite_quantized_model_blob_name(model_folder):
return '%s/quantized_model' % __get_tflite_folder(model_folder)
return '%s/tflite/quantized_model' % model_folder

def tflite_quantized_model_exists(model_folder):
client = util.storage_client()
Expand All @@ -333,92 +341,44 @@ def write_tflite_quantized_model_to_file(model_folder, filename):
blob_name = __get_tflite_quantized_model_blob_name(model_folder)
return __write_blob_to_file(blob_name, filename)

def __get_tflite_label_map_txt_blob_name(model_folder):
return '%s/label_map.txt' % __get_tflite_folder(model_folder)
def __get_tflite_label_map_txt_blob_name(tflite_files_folder):
return '%s/label_map.txt' % tflite_files_folder

def tflite_label_map_txt_exists(model_folder):
def tflite_label_map_txt_exists(tflite_files_folder):
client = util.storage_client()
blob_name = __get_tflite_label_map_txt_blob_name(model_folder)
blob_name = __get_tflite_label_map_txt_blob_name(tflite_files_folder)
blob = util.storage_client().get_bucket(BUCKET_BLOBS).blob(blob_name)
return blob.exists()

def store_tflite_label_map_txt(model_folder, tflite_label_map_txt):
blob_name = __get_tflite_label_map_txt_blob_name(model_folder)
def store_tflite_label_map_txt(tflite_files_folder, tflite_label_map_txt):
blob_name = __get_tflite_label_map_txt_blob_name(tflite_files_folder)
__write_string_to_blob(blob_name, tflite_label_map_txt, 'text/plain')

def write_tflite_label_map_txt_to_file(model_folder, filename):
blob_name = __get_tflite_label_map_txt_blob_name(model_folder)
def write_tflite_label_map_txt_to_file(tflite_files_folder, filename):
blob_name = __get_tflite_label_map_txt_blob_name(tflite_files_folder)
return __write_blob_to_file(blob_name, filename)

def get_tflite_model_with_metadata_blob_name(model_folder):
return '%s/model_with_metadata.tflite' % __get_tflite_folder(model_folder)
def get_tflite_model_with_metadata_blob_name(tflite_files_folder):
return '%s/model_with_metadata.tflite' % tflite_files_folder

def tflite_model_with_metadata_exists(model_folder):
blob_name = get_tflite_model_with_metadata_blob_name(model_folder)
def tflite_model_with_metadata_exists(tflite_files_folder):
blob_name = get_tflite_model_with_metadata_blob_name(tflite_files_folder)
blob = util.storage_client().get_bucket(BUCKET_BLOBS).blob(blob_name)
return blob.exists()

def store_tflite_model_with_metadata(model_folder, tflite_model_with_metadata_filename):
blob_name = get_tflite_model_with_metadata_blob_name(model_folder)
def store_tflite_model_with_metadata(tflite_files_folder, tflite_model_with_metadata_filename):
blob_name = get_tflite_model_with_metadata_blob_name(tflite_files_folder)
__write_file_to_blob(blob_name, tflite_model_with_metadata_filename, 'application/octet-stream')

def get_tflite_model_with_metadata_url(model_folder):
return __get_download_url(get_tflite_model_with_metadata_blob_name(model_folder))
def get_tflite_model_with_metadata_url(tflite_files_folder):
return __get_download_url(get_tflite_model_with_metadata_blob_name(tflite_files_folder))

def delete_model_blobs(model_folder, action_parameters=None):
def delete_model_blobs(folder, action_parameters=None):
client = util.storage_client()
prefix = '%s/' % model_folder
prefix = '%s/' % folder
for blob in client.list_blobs(BUCKET_BLOBS, prefix=prefix):
__delete_blob(blob.name)
if action_parameters is not None:
action.retrigger_if_necessary(action_parameters)

def expunge_blob_storage(action_parameters):
keep_tflite_and_labels = action_parameters['keep_tflite_and_labels']
blob_name_prefix = action_parameters['team_uuid_prefix']
client = util.storage_client()
if 'max_results' not in action_parameters:
action_parameters['max_results'] = 500
max_results = action_parameters['max_results']
while True:
action.retrigger_if_necessary(action_parameters)
logging.info('expunge_blob_storage for %s - max_results is %d' % (blob_name_prefix, max_results))
action.retrigger_if_necessary(action_parameters)
count_blobs = 0
count_blobs_to_ignore = 0
blob_names_to_delete = []
for blob in client.list_blobs(BUCKET_BLOBS, prefix=blob_name_prefix, max_results=max_results):
__delete_blob(blob.name)
if action_parameters is not None:
action.retrigger_if_necessary(action_parameters)
count_blobs += 1
# Don't delete blobs whose names begin with team_info/
if blob.name.startswith("team_info/"):
count_blobs_to_ignore += 1
continue
if keep_tflite_and_labels:
# Don't delete blobs whose names end in /tflite/model_with_metadata.tflite
if blob.name.endswith('/tflite/model_with_metadata.tflite'):
count_blobs_to_ignore += 1
continue
# Don't delete blobs whose names end in /tflite/label_map.txt
if blob.name.endswith('/tflite/label_map.txt'):
count_blobs_to_ignore += 1
continue
blob_names_to_delete.append(blob.name)
action.retrigger_if_necessary(action_parameters)
logging.info('expunge_blob_storage for %s - found %d blobs' % (blob_name_prefix, count_blobs))
logging.info('expunge_blob_storage for %s - ignoring %d blobs' % (blob_name_prefix, count_blobs_to_ignore))
if len(blob_names_to_delete) > 0:
# We found some blobs to delete.
logging.info('expunge_blob_storage for %s - deleting %d blobs' % (blob_name_prefix, len(blob_names_to_delete)))
__delete_blobs(blob_names_to_delete)
action_parameters['num_blobs_deleted'] += len(blob_names_to_delete)
elif count_blobs < max_results:
# We didn't find any blobs to delete and we looked at all the blobs.
action_parameters['num_blobs_not_deleted'] = count_blobs_to_ignore
break
if count_blobs_to_ignore > 0:
# Set max_results so we look at 500 more blobs than we ignore.
max_results = count_blobs_to_ignore + 500
action_parameters['max_results'] = max_results
logging.info('expunge_blob_storage for %s - incrementing max_results to %d' % (blob_name_prefix, max_results))
logging.info('expunge_blob_storage for %s - all done!' % blob_name_prefix)
1 change: 0 additions & 1 deletion server/app_engine/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,6 @@ def retrieve_tags_and_steps(team_uuid, model_uuid, job_type, value_type):
# storage.retrieve_model_entity will raise HttpErrorNotFound
# if the team_uuid/model_uuid is not found.
model_entity = storage.retrieve_model_entity(team_uuid, model_uuid)
model_folder = model_entity['model_folder']
list_of_summary_items = storage.get_model_summary_items_all_steps(model_entity, job_type, value_type)
step_and_tag_pairs = []
for summary_items in list_of_summary_items:
Expand Down
19 changes: 13 additions & 6 deletions server/app_engine/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,7 @@ def model_trainer_started(team_uuid, model_uuid, description, model_folder,
'model_uuid': model_uuid,
'description': description,
'model_folder': model_folder,
'tflite_files_folder': blob_storage.get_tflite_files_folder(team_uuid, model_uuid),
'tensorflow_version': tensorflow_version,
'use_tpu': use_tpu,
'dataset_uuids': dataset_uuids,
Expand Down Expand Up @@ -1385,10 +1386,17 @@ def __query_model_entity(team_uuid, model_uuid):
query.add_filter('team_uuid', '=', team_uuid)
query.add_filter('model_uuid', '=', model_uuid)
model_entities = list(query.fetch(1))
__update_model_entities(model_entities)
return model_entities

def __update_model_entities(model_entities):
# In previous versions, the model_folder and tflite_files_folder attributes did not exist in
# the model_entity. Add the here.
for model_entity in model_entities:
if 'model_folder' not in model_entity:
model_entity['model_folder'] = blob_storage.get_old_model_folder(team_uuid, model_uuid)
return model_entities
model_entity['model_folder'] = blob_storage.get_old_model_folder(team_uuid, model_entity['model_uuid'])
if 'tflite_files_folder' not in model_entity:
model_entity['tflite_files_folder'] = blob_storage.get_old_tflite_folder(model_entity['model_folder'])


# Retrieves the model entity associated with the given team_uuid and model_uuid. If no such
Expand Down Expand Up @@ -1685,9 +1693,7 @@ def retrieve_model_list(team_uuid):
query.add_filter('delete_in_progress', '=', False)
query.order = ['create_time']
model_entities = list(query.fetch())
for model_entity in model_entities:
if 'model_folder' not in model_entity:
model_entity['model_folder'] = blob_storage.get_old_model_folder(team_uuid, model_entity['model_uuid'])
__update_model_entities(model_entities)
return model_entities

def can_delete_datasets(team_uuid, dataset_uuid_requested_list):
Expand Down Expand Up @@ -1834,6 +1840,7 @@ def finish_delete_model(action_parameters):
model_entity = model_entities[0]
# Delete the blobs.
blob_storage.delete_model_blobs(model_entity['model_folder'], action_parameters=action_parameters)
blob_storage.delete_model_blobs(model_entity['tflite_files_folder'], action_parameters=action_parameters)
# Delete the model entity.
datastore_client.delete(model_entity.key)

Expand Down Expand Up @@ -2042,7 +2049,7 @@ def __save_end_of_season_entity(season, team_entity):
for model_entity in model_entities:
num_models += 1
model_names.append(model_entity['description'])
tflite_blob_names.append(blob_storage.get_tflite_model_with_metadata_blob_name(model_entity['model_folder']))
tflite_blob_names.append(blob_storage.get_tflite_model_with_metadata_blob_name(model_entity['tflite_files_folder']))
end_of_season_entity['model_names'] = model_names
end_of_season_entity['tflite_blob_names'] = tflite_blob_names
transaction.put(end_of_season_entity)
Expand Down
Loading

0 comments on commit b10b8aa

Please sign in to comment.