diff --git a/sky/jobs/controller.py b/sky/jobs/controller.py index 89a04a0edfc..8bfcf962aee 100644 --- a/sky/jobs/controller.py +++ b/sky/jobs/controller.py @@ -439,10 +439,10 @@ def _cancel_all_tasks(self, task_id: int) -> None: def run(self) -> None: """Run controller logic and handle exceptions.""" all_tasks_completed = lambda: self._num_tasks == len(self._task_status) - # TODO(andy): Serve has a logic to prevent from too many services running - # at the same time. We should have a similar logic here, but instead we - # should calculate the sum of the subtasks (an upper bound), instead of - # the number of jobs (dags). + # TODO(andy): Serve has a logic to prevent from too many services + # running at the same time. We should have a similar logic here, but + # instead we should calculate the sum of the subtasks (an upper bound), + # instead of the number of jobs (dags). # Further, we could try to calculate the maximum concurrency in the dag # (e.g. for a chain dag it is 1 instead of n), which could allow us to # run more dags in parallel. diff --git a/sky/jobs/utils.py b/sky/jobs/utils.py index 2ff83c668e3..a021443701f 100644 --- a/sky/jobs/utils.py +++ b/sky/jobs/utils.py @@ -397,13 +397,13 @@ def update_message(msg: str): time.sleep(1) if managed_job_status.is_terminal(): + failure_reason = (('\nFailure reason: ' + f'{managed_job_state.get_failure_reason(job_id)}' + ) if managed_job_status.is_failed() else '') return (f'{colorama.Fore.YELLOW}' f'Job {job_id} is already in terminal state ' f'{managed_job_status.value}. Logs will not be shown.' - f'{colorama.Style.RESET_ALL}') + ( - ('\nFailure reason: ' - f'{managed_job_state.get_failure_reason(job_id)}') - if managed_job_status.is_failed() else '') + f'{colorama.Style.RESET_ALL}') + failure_reason def get_next_task_id_status( job_id: int, task_id: Optional[int] @@ -442,10 +442,11 @@ def get_next_task_id_status( # the next round of status check. if (handle is None or managed_job_status != managed_job_state.ManagedJobStatus.RUNNING): - status_str = ( - f' (status: {managed_job_status.value})' - if managed_job_status is not None and managed_job_status != - managed_job_state.ManagedJobStatus.RUNNING else '') + status_str = '' + if (managed_job_status is not None and managed_job_status != + managed_job_state.ManagedJobStatus.RUNNING): + status_str = (f' (status: {managed_job_status.value})') + logger.debug( f'INFO: The log is not ready yet{status_str}. ' f'Waiting for {JOB_STATUS_CHECK_GAP_SECONDS} seconds.')