From 264695f2eb35cde5a80048d9866a24a3b7f9a635 Mon Sep 17 00:00:00 2001 From: Yika Luo Date: Fri, 1 Nov 2024 14:04:03 -0700 Subject: [PATCH 1/5] Fix AWS unattended upgrade issue --- sky/templates/aws-ray.yml.j2 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2 index 269c4acc505..1b05ef707f5 100644 --- a/sky/templates/aws-ray.yml.j2 +++ b/sky/templates/aws-ray.yml.j2 @@ -178,6 +178,9 @@ setup_commands: {{ ray_skypilot_installation_commands }} sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf'; {%- if docker_image is none %} + sudo systemctl stop unattended-upgrades || true; + sudo systemctl disable unattended-upgrades || true; + sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true; sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload; {%- endif %} mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config; From 535be8ab74e6cbf59d68430da4ae95e665f64c02 Mon Sep 17 00:00:00 2001 From: Yika Luo Date: Fri, 1 Nov 2024 17:03:21 -0700 Subject: [PATCH 2/5] more commands --- sky/templates/aws-ray.yml.j2 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2 index 1b05ef707f5..ee8cca3450c 100644 --- a/sky/templates/aws-ray.yml.j2 +++ b/sky/templates/aws-ray.yml.j2 @@ -181,6 +181,10 @@ setup_commands: sudo systemctl stop unattended-upgrades || true; sudo systemctl disable unattended-upgrades || true; sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true; + sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1` || true; + sudo pkill -9 apt-get; + sudo pkill -9 dpkg; + sudo dpkg --configure -a; sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload; {%- endif %} mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config; From 22b1742872e0a9bea98d58a9e69466e3529799b0 Mon Sep 17 00:00:00 2001 From: Yika Luo Date: Mon, 4 Nov 2024 09:45:02 -0800 Subject: [PATCH 3/5] add retry and disable all unattended --- sky/templates/aws-ray.yml.j2 | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2 index ee8cca3450c..ecba980857f 100644 --- a/sky/templates/aws-ray.yml.j2 +++ b/sky/templates/aws-ray.yml.j2 @@ -122,6 +122,10 @@ available_node_types: - path: /etc/apt/apt.conf.d/10cloudinit-disable content: | APT::Periodic::Enable "0"; + - path: /etc/apt/apt.conf.d/52unattended-upgrades-local + content: | + Unattended-Upgrade::DevRelease "false"; + Unattended-Upgrade::Allowed-Origins {}; bootcmd: - systemctl stop apt-daily.timer apt-daily-upgrade.timer unattended-upgrades.service - systemctl disable apt-daily.timer apt-daily-upgrade.timer unattended-upgrades.service @@ -178,15 +182,9 @@ setup_commands: {{ ray_skypilot_installation_commands }} sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf'; {%- if docker_image is none %} - sudo systemctl stop unattended-upgrades || true; - sudo systemctl disable unattended-upgrades || true; - sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true; - sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1` || true; - sudo pkill -9 apt-get; - sudo pkill -9 dpkg; - sudo dpkg --configure -a; sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload; {%- endif %} + sudo apt install retry; mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config; [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf'); # This is needed for `-o allow_other` option for `goofys`; From 38a2217f7ee6c9fa172d2937a502b2f9acdc81c4 Mon Sep 17 00:00:00 2001 From: Yika Luo Date: Mon, 4 Nov 2024 14:01:37 -0800 Subject: [PATCH 4/5] remove retry --- sky/templates/aws-ray.yml.j2 | 1 - 1 file changed, 1 deletion(-) diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2 index ecba980857f..95751ab1849 100644 --- a/sky/templates/aws-ray.yml.j2 +++ b/sky/templates/aws-ray.yml.j2 @@ -184,7 +184,6 @@ setup_commands: {%- if docker_image is none %} sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload; {%- endif %} - sudo apt install retry; mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config; [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf'); # This is needed for `-o allow_other` option for `goofys`; From 516b180cc19dd7528601e2746d83bb338ca82e70 Mon Sep 17 00:00:00 2001 From: Yika Luo Date: Mon, 4 Nov 2024 14:08:53 -0800 Subject: [PATCH 5/5] disable unattended upgrades and add retry in aws default image --- sky/clouds/service_catalog/images/aws_utils/image_gen.py | 2 +- sky/clouds/service_catalog/images/provisioners/skypilot.sh | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sky/clouds/service_catalog/images/aws_utils/image_gen.py b/sky/clouds/service_catalog/images/aws_utils/image_gen.py index cadfee912a9..970988997d2 100644 --- a/sky/clouds/service_catalog/images/aws_utils/image_gen.py +++ b/sky/clouds/service_catalog/images/aws_utils/image_gen.py @@ -78,7 +78,7 @@ def copy_image_and_make_public(target_region): copy_command = ( f"aws ec2 copy-image --source-region {args.region} " f"--source-image-id {args.image_id} --region {target_region} " - f"--name 'skypilot-aws-{args.processor}-{args.os_type}-{time.time()}' --output json" + f"--name 'skypilot-aws-{args.processor}-{args.os_type}-{time.strftime('%y%m%d')}' --output json" ) print(copy_command) result = subprocess.run(copy_command, diff --git a/sky/clouds/service_catalog/images/provisioners/skypilot.sh b/sky/clouds/service_catalog/images/provisioners/skypilot.sh index cecb3664736..3bdb6dd0635 100644 --- a/sky/clouds/service_catalog/images/provisioners/skypilot.sh +++ b/sky/clouds/service_catalog/images/provisioners/skypilot.sh @@ -4,12 +4,17 @@ sudo systemctl stop unattended-upgrades || true sudo systemctl disable unattended-upgrades || true sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true +sudo systemctl stop apt-daily.timer apt-daily-upgrade.timer unattended-upgrades.service +sudo systemctl disable apt-daily.timer apt-daily-upgrade.timer unattended-upgrades.service +sudo systemctl mask apt-daily.service apt-daily-upgrade.service unattended-upgrades.service +sudo systemctl daemon-reload # Configure dpkg sudo dpkg --configure --force-overwrite -a # Apt-get installs sudo apt-get install jq -y +sudo apt install retry # Create necessary directories mkdir -p ~/sky_workdir