Skip to content

Commit

Permalink
optimize privilege check
Browse files Browse the repository at this point in the history
  • Loading branch information
romilbhardwaj committed Nov 5, 2024
1 parent 7e1a491 commit 12ed17e
Showing 1 changed file with 36 additions and 33 deletions.
69 changes: 36 additions & 33 deletions sky/provision/kubernetes/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,41 +371,44 @@ def _check_user_privilege(namespace: str, context: Optional[str],
' fi; '
'fi')

def _check_privilege_thread(new_node):
runner = command_runner.KubernetesCommandRunner(
((namespace, context), new_node.metadata.name))
logger.info(f'{"-"*20}Start: Check user privilege in pod '
f'{new_node.metadata.name!r} {"-"*20}')
# Retry in case of transient errors
for attempt in range(_MAX_RETRIES + 1):
try:
rc, stdout, stderr = runner.run(check_k8s_user_sudo_cmd,
require_outputs=True,
separate_stderr=True,
stream_logs=False)
_raise_command_running_error('check user privilege',
check_k8s_user_sudo_cmd,
new_node.metadata.name, rc,
stdout + stderr)
break
except Exception as e: # config_lib.KubernetesError?
if attempt < _MAX_RETRIES:
logger.warning('Failed to check user privilege in pod '
'- retrying in 5 seconds.')
time.sleep(5)
else:
raise

if stdout == str(exceptions.INSUFFICIENT_PRIVILEGES_CODE):
raise config_lib.KubernetesError(
'Insufficient system privileges detected. '
'Ensure the default user has root access or '
'"sudo" is installed and the user is added to the sudoers '
'from the image.')
logger.info(f'{"-"*20}End: Check user privilege in pod '
f'{new_node.metadata.name!r} {"-"*20}')
# This check needs to run on a per-image basis, so running the check on
# any one pod is sufficient.
new_node = new_nodes[0]

runner = command_runner.KubernetesCommandRunner(
((namespace, context), new_node.metadata.name))
logger.info(f'{"-"*20}Start: Check user privilege in pod '
f'{new_node.metadata.name!r} {"-"*20}')

# Retry in case of transient errors
for attempt in range(_MAX_RETRIES + 1):
try:
rc, stdout, stderr = runner.run(check_k8s_user_sudo_cmd,
require_outputs=True,
separate_stderr=True,
stream_logs=False)
_raise_command_running_error('check user privilege',
check_k8s_user_sudo_cmd,
new_node.metadata.name, rc,
stdout + stderr)
break
except Exception as e: # config_lib.KubernetesError?
if attempt < _MAX_RETRIES:
logger.warning('Failed to check user privilege in pod '
'- retrying in 5 seconds.')
time.sleep(5)
else:
raise

subprocess_utils.run_in_parallel(_check_privilege_thread, new_nodes)
if stdout == str(exceptions.INSUFFICIENT_PRIVILEGES_CODE):
raise config_lib.KubernetesError(
'Insufficient system privileges detected. '
'Ensure the default user has root access or '
'"sudo" is installed and the user is added to the sudoers '
'from the image.')
logger.info(f'{"-"*20}End: Check user privilege in pod '
f'{new_node.metadata.name!r} {"-"*20}')


def _setup_ssh_in_pods(namespace: str, context: Optional[str],
Expand Down

0 comments on commit 12ed17e

Please sign in to comment.