Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Commit

Permalink
Changed the blob directory structure to make cleaning up before the n…
Browse files Browse the repository at this point in the history
…ext season easier. Blobs are now put in the following structure:

+ 2022_2023
  + video_files
  + image_files
  + tf_records
  + dataset_zips
  + models
  + tflite_files

Removed the admin action that deleted blobs as it won't be needed. With the changes in the blob directory structure, deleting blobs can be done more efficiently in the cloud console.
  • Loading branch information
lizlooney committed Sep 12, 2022
1 parent f932242 commit e15740a
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 261 deletions.
2 changes: 0 additions & 2 deletions server/app_engine/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,12 @@
ACTION_NAME_INCREMENT_REMAINING_TRAINING_MINUTES = 'increment_remaining_training_minutes'
ACTION_NAME_SAVE_END_OF_SEASON_ENTITIES = 'save_end_of_season_entities'
ACTION_NAME_RESET_TEAM_ENTITIES = 'reset_team_entities'
ACTION_NAME_EXPUNGE_BLOB_STORAGE = 'expunge_blob_storage'

def create_action_parameters(team_uuid, action_name):
if (action_name == ACTION_NAME_RESET_REMAINING_TRAINING_MINUTES or
action_name == ACTION_NAME_INCREMENT_REMAINING_TRAINING_MINUTES or
action_name == ACTION_NAME_SAVE_END_OF_SEASON_ENTITIES or
action_name == ACTION_NAME_RESET_TEAM_ENTITIES or
action_name == ACTION_NAME_EXPUNGE_BLOB_STORAGE or
action_name == ACTION_NAME_TEST):
is_admin_action = True
else:
Expand Down
58 changes: 4 additions & 54 deletions server/app_engine/app_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,29 +173,6 @@ def validate_string_not_empty(s):
raise exceptions.HttpErrorBadRequest(message)


def validate_team_uuid_prefixes(s):
team_uuid_prefixes = []
tokens = s.split(',')
valid = True
allowed = '0123456789abcdef'
for token in tokens:
if len(token) > 32:
valid = False
break
for c in token:
if c not in allowed:
valid = False
break
if not valid:
break
team_uuid_prefixes.append(token)
if valid:
return team_uuid_prefixes
message = "Error: '%s' is not a valid argument." % s
logging.critical(message)
raise exceptions.HttpErrorBadRequest(message)


def validate_boolean(s):
if s == 'false':
return False
Expand Down Expand Up @@ -1512,8 +1489,8 @@ def create_tflite():
# storage.retrieve_model_entity will raise HttpErrorNotFound
# if the team_uuid/model_uuid is not found.
model_entity = storage.retrieve_model_entity(team_uuid, model_uuid)
model_folder = model_entity['model_folder']
exists, download_url = blob_storage.get_tflite_model_with_metadata_url(model_folder)
tflite_files_folder = model_entity['tflite_files_folder']
exists, download_url = blob_storage.get_tflite_model_with_metadata_url(tflite_files_folder)
if exists:
blob_storage.set_cors_policy_for_get()
else:
Expand All @@ -1535,8 +1512,8 @@ def get_tflite_download_url():
# storage.retrieve_model_entity will raise HttpErrorNotFound
# if the team_uuid/model_uuid is not found.
model_entity = storage.retrieve_model_entity(team_uuid, model_uuid)
model_folder = model_entity['model_folder']
exists, download_url = blob_storage.get_tflite_model_with_metadata_url(model_folder)
tflite_files_folder = model_entity['tflite_files_folder']
exists, download_url = blob_storage.get_tflite_model_with_metadata_url(tflite_files_folder)
if exists:
blob_storage.set_cors_policy_for_get()
response = {
Expand Down Expand Up @@ -1640,33 +1617,6 @@ def resetTeamEntities():
}
return flask.jsonify(__sanitize(response))

@app.route('/expungeBlobStorage', methods=['POST'])
@handle_exceptions
@login_required
@roles_accepted(roles.Role.GLOBAL_ADMIN, roles.Role.ML_DEVELOPER)
def expunge_blob_storage():
data = validate_keys(flask.request.form.to_dict(flat=True),
['date_time_string', 'keep_tflite_and_labels', 'team_uuid_prefixes'])
date_time_string = data.get('date_time_string')
keep_tflite_and_labels = validate_boolean(data.get('keep_tflite_and_labels'))
team_uuid_prefixes = validate_team_uuid_prefixes(data.get('team_uuid_prefixes'))
action_uuids = []
for team_uuid_prefix in team_uuid_prefixes:
action_parameters = action.create_action_parameters(
'', action.ACTION_NAME_EXPUNGE_BLOB_STORAGE)
action_parameters['date_time_string'] = date_time_string
action_parameters['keep_tflite_and_labels'] = keep_tflite_and_labels
action_parameters['team_uuid_prefix'] = team_uuid_prefix
action_parameters['num_blobs_deleted'] = 0
action_parameters['num_blobs_not_deleted'] = 0
action_uuid = action.trigger_action_via_blob(action_parameters)
action_uuids.append(action_uuid)

response = {
'action_uuids': action_uuids,
}
return flask.jsonify(__sanitize(response))


# performActionGAE is for debugging purposes only.
@app.route('/performActionGAE', methods=['POST'])
Expand Down
116 changes: 38 additions & 78 deletions server/app_engine/blob_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

BUCKET_BLOBS = ('%s-blobs' % constants.PROJECT_ID)

CURRENT_SEASON = '2022_2023'

# blob storage

def __retrieve_blob(blob_name):
Expand Down Expand Up @@ -116,7 +118,7 @@ def __delete_blobs(blob_names):
# video files

def get_video_blob_name(team_uuid, video_uuid):
return '%s/video_files/%s' % (team_uuid, video_uuid)
return '%s/video_files/%s/%s' % (CURRENT_SEASON, team_uuid, video_uuid)

def prepare_to_upload_video(team_uuid, video_uuid, content_type):
video_blob_name = get_video_blob_name(team_uuid, video_uuid)
Expand All @@ -142,7 +144,7 @@ def delete_video_blob(video_blob_name):
# video frame images

def store_video_frame_image(team_uuid, video_uuid, frame_number, content_type, image):
image_blob_name = '%s/image_files/%s/%05d' % (team_uuid, video_uuid, frame_number)
image_blob_name = '%s/image_files/%s/%s/%05d' % (CURRENT_SEASON, team_uuid, video_uuid, frame_number)
__write_string_to_blob(image_blob_name, image, content_type)
return image_blob_name

Expand All @@ -164,7 +166,7 @@ def delete_video_frame_images(image_blob_names):
# dataset records

def get_dataset_folder(team_uuid, dataset_uuid):
return '%s/tf_records/%s' % (team_uuid, dataset_uuid)
return '%s/tf_records/%s/%s' % (CURRENT_SEASON, team_uuid, dataset_uuid)

def get_dataset_folder_path(team_uuid, dataset_uuid):
return __get_path(get_dataset_folder(team_uuid, dataset_uuid))
Expand Down Expand Up @@ -193,7 +195,7 @@ def delete_dataset_blobs(blob_names):
# dataset zips

def __get_dataset_zip_blob_name(team_uuid, dataset_zip_uuid, partition_index):
return '%s/dataset_zips/%s/%s' % (team_uuid, dataset_zip_uuid, partition_index)
return '%s/dataset_zips/%s/%s/%s' % (CURRENT_SEASON, team_uuid, dataset_zip_uuid, partition_index)

def store_dataset_zip(team_uuid, dataset_zip_uuid, partition_index, zip_data):
blob_name = __get_dataset_zip_blob_name(team_uuid, dataset_zip_uuid, partition_index)
Expand All @@ -220,7 +222,7 @@ def get_old_model_folder(team_uuid, model_uuid):
return 'models/%s/%s' % (team_uuid, model_uuid)

def get_model_folder(team_uuid, model_uuid):
return '%s/models/%s' % (team_uuid, model_uuid)
return '%s/models/%s/%s' % (CURRENT_SEASON, team_uuid, model_uuid)

def get_model_folder_path(model_folder):
return __get_path(model_folder)
Expand Down Expand Up @@ -296,14 +298,20 @@ def get_trained_checkpoint_path(model_folder):
return __get_path(blob_name)
return ''

def __get_tflite_folder(model_folder):
def get_old_tflite_folder(model_folder):
return '%s/tflite' % model_folder

def get_tflite_folder_path(model_folder):
return __get_path(__get_tflite_folder(model_folder))
def get_tflite_files_folder(team_uuid, model_uuid):
return '%s/tflite_files/%s/%s' % (CURRENT_SEASON, team_uuid, model_uuid)

def get_tflite_files_folder_path(tflite_files_folder):
return __get_path(tflite_files_folder)

def get_tflite_saved_model_parent_path(model_folder):
return __get_path('%s/tflite' % model_folder)

def __get_tflite_saved_model_folder(model_folder):
return '%s/saved_model' % __get_tflite_folder(model_folder)
return '%s/tflite/saved_model' % model_folder

def get_tflite_saved_model_path(model_folder):
return __get_path(__get_tflite_saved_model_folder(model_folder))
Expand All @@ -317,7 +325,7 @@ def tflite_saved_model_exists(model_folder):
return False

def __get_tflite_quantized_model_blob_name(model_folder):
return '%s/quantized_model' % __get_tflite_folder(model_folder)
return '%s/tflite/quantized_model' % model_folder

def tflite_quantized_model_exists(model_folder):
client = util.storage_client()
Expand All @@ -333,92 +341,44 @@ def write_tflite_quantized_model_to_file(model_folder, filename):
blob_name = __get_tflite_quantized_model_blob_name(model_folder)
return __write_blob_to_file(blob_name, filename)

def __get_tflite_label_map_txt_blob_name(model_folder):
return '%s/label_map.txt' % __get_tflite_folder(model_folder)
def __get_tflite_label_map_txt_blob_name(tflite_files_folder):
return '%s/label_map.txt' % tflite_files_folder

def tflite_label_map_txt_exists(model_folder):
def tflite_label_map_txt_exists(tflite_files_folder):
client = util.storage_client()
blob_name = __get_tflite_label_map_txt_blob_name(model_folder)
blob_name = __get_tflite_label_map_txt_blob_name(tflite_files_folder)
blob = util.storage_client().get_bucket(BUCKET_BLOBS).blob(blob_name)
return blob.exists()

def store_tflite_label_map_txt(model_folder, tflite_label_map_txt):
blob_name = __get_tflite_label_map_txt_blob_name(model_folder)
def store_tflite_label_map_txt(tflite_files_folder, tflite_label_map_txt):
blob_name = __get_tflite_label_map_txt_blob_name(tflite_files_folder)
__write_string_to_blob(blob_name, tflite_label_map_txt, 'text/plain')

def write_tflite_label_map_txt_to_file(model_folder, filename):
blob_name = __get_tflite_label_map_txt_blob_name(model_folder)
def write_tflite_label_map_txt_to_file(tflite_files_folder, filename):
blob_name = __get_tflite_label_map_txt_blob_name(tflite_files_folder)
return __write_blob_to_file(blob_name, filename)

def get_tflite_model_with_metadata_blob_name(model_folder):
return '%s/model_with_metadata.tflite' % __get_tflite_folder(model_folder)
def get_tflite_model_with_metadata_blob_name(tflite_files_folder):
return '%s/model_with_metadata.tflite' % tflite_files_folder

def tflite_model_with_metadata_exists(model_folder):
blob_name = get_tflite_model_with_metadata_blob_name(model_folder)
def tflite_model_with_metadata_exists(tflite_files_folder):
blob_name = get_tflite_model_with_metadata_blob_name(tflite_files_folder)
blob = util.storage_client().get_bucket(BUCKET_BLOBS).blob(blob_name)
return blob.exists()

def store_tflite_model_with_metadata(model_folder, tflite_model_with_metadata_filename):
blob_name = get_tflite_model_with_metadata_blob_name(model_folder)
def store_tflite_model_with_metadata(tflite_files_folder, tflite_model_with_metadata_filename):
blob_name = get_tflite_model_with_metadata_blob_name(tflite_files_folder)
__write_file_to_blob(blob_name, tflite_model_with_metadata_filename, 'application/octet-stream')

def get_tflite_model_with_metadata_url(model_folder):
return __get_download_url(get_tflite_model_with_metadata_blob_name(model_folder))
def get_tflite_model_with_metadata_url(tflite_files_folder):
return __get_download_url(get_tflite_model_with_metadata_blob_name(tflite_files_folder))

def delete_model_blobs(model_folder, action_parameters=None):
def delete_model_blobs(folder, action_parameters=None):
client = util.storage_client()
prefix = '%s/' % model_folder
prefix = '%s/' % folder
for blob in client.list_blobs(BUCKET_BLOBS, prefix=prefix):
__delete_blob(blob.name)
if action_parameters is not None:
action.retrigger_if_necessary(action_parameters)

def expunge_blob_storage(action_parameters):
keep_tflite_and_labels = action_parameters['keep_tflite_and_labels']
blob_name_prefix = action_parameters['team_uuid_prefix']
client = util.storage_client()
if 'max_results' not in action_parameters:
action_parameters['max_results'] = 500
max_results = action_parameters['max_results']
while True:
action.retrigger_if_necessary(action_parameters)
logging.info('expunge_blob_storage for %s - max_results is %d' % (blob_name_prefix, max_results))
action.retrigger_if_necessary(action_parameters)
count_blobs = 0
count_blobs_to_ignore = 0
blob_names_to_delete = []
for blob in client.list_blobs(BUCKET_BLOBS, prefix=blob_name_prefix, max_results=max_results):
__delete_blob(blob.name)
if action_parameters is not None:
action.retrigger_if_necessary(action_parameters)
count_blobs += 1
# Don't delete blobs whose names begin with team_info/
if blob.name.startswith("team_info/"):
count_blobs_to_ignore += 1
continue
if keep_tflite_and_labels:
# Don't delete blobs whose names end in /tflite/model_with_metadata.tflite
if blob.name.endswith('/tflite/model_with_metadata.tflite'):
count_blobs_to_ignore += 1
continue
# Don't delete blobs whose names end in /tflite/label_map.txt
if blob.name.endswith('/tflite/label_map.txt'):
count_blobs_to_ignore += 1
continue
blob_names_to_delete.append(blob.name)
action.retrigger_if_necessary(action_parameters)
logging.info('expunge_blob_storage for %s - found %d blobs' % (blob_name_prefix, count_blobs))
logging.info('expunge_blob_storage for %s - ignoring %d blobs' % (blob_name_prefix, count_blobs_to_ignore))
if len(blob_names_to_delete) > 0:
# We found some blobs to delete.
logging.info('expunge_blob_storage for %s - deleting %d blobs' % (blob_name_prefix, len(blob_names_to_delete)))
__delete_blobs(blob_names_to_delete)
action_parameters['num_blobs_deleted'] += len(blob_names_to_delete)
elif count_blobs < max_results:
# We didn't find any blobs to delete and we looked at all the blobs.
action_parameters['num_blobs_not_deleted'] = count_blobs_to_ignore
break
if count_blobs_to_ignore > 0:
# Set max_results so we look at 500 more blobs than we ignore.
max_results = count_blobs_to_ignore + 500
action_parameters['max_results'] = max_results
logging.info('expunge_blob_storage for %s - incrementing max_results to %d' % (blob_name_prefix, max_results))
logging.info('expunge_blob_storage for %s - all done!' % blob_name_prefix)
1 change: 0 additions & 1 deletion server/app_engine/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,6 @@ def retrieve_tags_and_steps(team_uuid, model_uuid, job_type, value_type):
# storage.retrieve_model_entity will raise HttpErrorNotFound
# if the team_uuid/model_uuid is not found.
model_entity = storage.retrieve_model_entity(team_uuid, model_uuid)
model_folder = model_entity['model_folder']
list_of_summary_items = storage.get_model_summary_items_all_steps(model_entity, job_type, value_type)
step_and_tag_pairs = []
for summary_items in list_of_summary_items:
Expand Down
19 changes: 13 additions & 6 deletions server/app_engine/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,7 @@ def model_trainer_started(team_uuid, model_uuid, description, model_folder,
'model_uuid': model_uuid,
'description': description,
'model_folder': model_folder,
'tflite_files_folder': blob_storage.get_tflite_files_folder(team_uuid, model_uuid),
'tensorflow_version': tensorflow_version,
'use_tpu': use_tpu,
'dataset_uuids': dataset_uuids,
Expand Down Expand Up @@ -1385,10 +1386,17 @@ def __query_model_entity(team_uuid, model_uuid):
query.add_filter('team_uuid', '=', team_uuid)
query.add_filter('model_uuid', '=', model_uuid)
model_entities = list(query.fetch(1))
__update_model_entities(model_entities)
return model_entities

def __update_model_entities(model_entities):
# In previous versions, the model_folder and tflite_files_folder attributes did not exist in
# the model_entity. Add the here.
for model_entity in model_entities:
if 'model_folder' not in model_entity:
model_entity['model_folder'] = blob_storage.get_old_model_folder(team_uuid, model_uuid)
return model_entities
model_entity['model_folder'] = blob_storage.get_old_model_folder(team_uuid, model_entity['model_uuid'])
if 'tflite_files_folder' not in model_entity:
model_entity['tflite_files_folder'] = blob_storage.get_old_tflite_folder(model_entity['model_folder'])


# Retrieves the model entity associated with the given team_uuid and model_uuid. If no such
Expand Down Expand Up @@ -1685,9 +1693,7 @@ def retrieve_model_list(team_uuid):
query.add_filter('delete_in_progress', '=', False)
query.order = ['create_time']
model_entities = list(query.fetch())
for model_entity in model_entities:
if 'model_folder' not in model_entity:
model_entity['model_folder'] = blob_storage.get_old_model_folder(team_uuid, model_entity['model_uuid'])
__update_model_entities(model_entities)
return model_entities

def can_delete_datasets(team_uuid, dataset_uuid_requested_list):
Expand Down Expand Up @@ -1834,6 +1840,7 @@ def finish_delete_model(action_parameters):
model_entity = model_entities[0]
# Delete the blobs.
blob_storage.delete_model_blobs(model_entity['model_folder'], action_parameters=action_parameters)
blob_storage.delete_model_blobs(model_entity['tflite_files_folder'], action_parameters=action_parameters)
# Delete the model entity.
datastore_client.delete(model_entity.key)

Expand Down Expand Up @@ -2042,7 +2049,7 @@ def __save_end_of_season_entity(season, team_entity):
for model_entity in model_entities:
num_models += 1
model_names.append(model_entity['description'])
tflite_blob_names.append(blob_storage.get_tflite_model_with_metadata_blob_name(model_entity['model_folder']))
tflite_blob_names.append(blob_storage.get_tflite_model_with_metadata_blob_name(model_entity['tflite_files_folder']))
end_of_season_entity['model_names'] = model_names
end_of_season_entity['tflite_blob_names'] = tflite_blob_names
transaction.put(end_of_season_entity)
Expand Down
Loading

0 comments on commit e15740a

Please sign in to comment.