Skip to content

Commit

Permalink
Implement elemental-register upgrade
Browse files Browse the repository at this point in the history
Signed-off-by: Andrea Mazzotti <andrea.mazzotti@suse.com>
  • Loading branch information
anmazzotti committed Oct 10, 2024
1 parent fad205a commit a118dbd
Show file tree
Hide file tree
Showing 86 changed files with 11,044 additions and 4,271 deletions.
1 change: 1 addition & 0 deletions cmd/register/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func main() {
cmd.AddCommand(
newVersionCommand(),
newDumpDataCommand(),
newUpgradeCommand(),
)
if err := cmd.Execute(); err != nil {
log.Fatalf("FATAL: %s", err)
Expand Down
278 changes: 278 additions & 0 deletions cmd/register/upgrade.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
/*
Copyright © 2022 - 2024 SUSE LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"bytes"
"context"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"

"github.com/gofrs/flock"
"github.com/rancher/elemental-operator/pkg/elementalcli"
"github.com/rancher/elemental-operator/pkg/log"
"github.com/spf13/cobra"
"github.com/spf13/viper"
)

var (
ErrRebooting = errors.New("Machine needs reboot after upgrade")
ErrTimedOut = errors.New("Upgrade timed out")
ErrAlreadyShuttingDown = errors.New("System is already shutting down")
mounts = []string{"/dev", "/run"}
)

const (
lockPath = "/run/elemental/upgrade.lock"
lockTimeout = 10 * time.Minute
upgradeCloudConfigPath = "/oem/90_operator.yaml"
correlationIDLabelKey = "correlationID"
)

func newUpgradeCommand() *cobra.Command {
var hostDir string
var cloudConfigPath string
var recovery bool
var recoveryOnly bool
var force bool
var debug bool
var system string
var correlationID string

cmd := &cobra.Command{
Use: "upgrade",
Short: "Upgrades the machine",
RunE: func(_ *cobra.Command, _ []string) error {
upgradeConfig := elementalcli.UpgradeConfig{
Debug: debug,
Recovery: recovery,
RecoveryOnly: recoveryOnly,
System: system,
Bootloader: true,
}

needsReboot, err := upgrade(upgradeConfig, hostDir, cloudConfigPath, correlationID, force)
// If the upgrade could not be applied or verified,
// then this command will fail but the machine will not reboot.
if err != nil {
return fmt.Errorf("upgrading machine: %w", err)
}
// If the machine needs a reboot after an upgrade has been applied,
// so that consumers can try again after reboot to validate the upgrade has been applied successfully.
if needsReboot {
reboot()
return ErrRebooting
}
// Upgrade has been applied successfully, nothing to do.
return nil
},
}

viper.AutomaticEnv()
cmd.Flags().StringVar(&hostDir, "host-dir", "/host", "The machine root directory where to apply the upgrade")
cmd.Flags().StringVar(&cloudConfigPath, "cloud-config", "/run/data/cloud-config", "The path of a cloud-config file to install on the machine during upgrade")
cmd.Flags().StringVar(&system, "system", "dir:/", "The system image uri or filesystem location to upgrade to")
cmd.Flags().StringVar(&correlationID, "correlation-id", "", "A correlationID to label the upgrade snapshot with")
cmd.Flags().BoolVar(&recovery, "recovery", false, "Upgrades the recovery partition together with the system")
cmd.Flags().BoolVar(&recoveryOnly, "recovery-only", false, "Upgrades the recovery partition only")
cmd.Flags().BoolVar(&force, "force", false, "Force the application of the upgrade, even with an already installed correlation-id")
cmd.Flags().BoolVar(&debug, "debug", true, "Prints debug logs when performing upgrade")
return cmd
}

func upgrade(config elementalcli.UpgradeConfig, hostDir string, cloudConfigPath string, correlationID string, force bool) (bool, error) {
log.Infof("Applying upgrade: %s", correlationID)

hostLockPath := filepath.Join(hostDir, lockPath)
runner := elementalcli.NewRunner()

ctx, cancel := context.WithTimeout(context.Background(), lockTimeout)
defer cancel()

fileLock := flock.New(hostLockPath)

for {
time.Sleep(1 * time.Second)
select {
case <-ctx.Done():
log.Errorf("Upgrade timed out")
return false, ErrTimedOut
default:
lockAcquired, err := fileLock.TryLock()
if err != nil {
return false, fmt.Errorf("trying to lock file '%s': %w", hostLockPath, err)
}

if lockAcquired {
defer unlock(fileLock, hostLockPath)

shuttingDown, err := isSystemShuttingDown()
if err != nil {
return false, fmt.Errorf("determining if system is shutting down: %w", err)
}
if shuttingDown {
return false, ErrAlreadyShuttingDown
}

if err := applyCloudConfig(hostDir, cloudConfigPath); err != nil {
return false, fmt.Errorf("applying upgrade cloud config: %w", err)
}

elementalState, err := runner.GetState()
if err != nil {
return false, fmt.Errorf("reading installation state: %w", err)
}

if !isCorrelationIDFound(elementalState, correlationID) || force {
log.Infof("Applying upgrade %s", correlationID)
if err := mountDirs(mounts, hostDir); err != nil {
return false, fmt.Errorf("mounting host directories: %w", err)
}
if err := runner.Upgrade(config); err != nil {
return false, fmt.Errorf("applying upgrade '%s': %w", correlationID, err)
}
} else {
log.Infof("Upgrade '%s' successfully applied", correlationID)
return false, nil
}
}
}
}
}

func unlock(fileLock *flock.Flock, lockPath string) {
if err := fileLock.Unlock(); err != nil {
log.Errorf("Cloud not unlock file '%s': %s", lockPath, err.Error())
}
}

func applyCloudConfig(hostDir string, cloudConfigPath string) error {
hostCloudConfigPath := filepath.Join(hostDir, upgradeCloudConfigPath)

cloudConfigBytes, err := os.ReadFile(cloudConfigPath)
if os.IsNotExist(err) {
log.Infof("Upgrade cloud config '%s' is missing. Removing previously applied config in '%s', if any.", cloudConfigPath, hostCloudConfigPath)
if err := os.Remove(hostCloudConfigPath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing file '%s': %w", hostCloudConfigPath, err)
}
return nil
}
if err != nil {
return fmt.Errorf("reading file '%s': %w", cloudConfigPath, err)
}

hostCloudConfigBytes, err := os.ReadFile(hostCloudConfigPath)
if os.IsNotExist(err) || err == nil {
if !bytes.Equal(hostCloudConfigBytes, cloudConfigBytes) {
log.Infof("Applying upgrade cloud config to: %s", hostCloudConfigPath)
if err := os.WriteFile(hostCloudConfigPath, cloudConfigBytes, os.ModePerm); err != nil {
return fmt.Errorf("writing file '%s': %w", hostCloudConfigPath, err)
}
}
} else {
return fmt.Errorf("reading file '%s': %w", hostCloudConfigPath, err)
}

return nil
}

func isCorrelationIDFound(elementalState elementalcli.State, correlationID string) bool {
// This is normally not supposed to happen, as we expect at least the first snapshot to be present after install.
// However we can still try to upgrade in this case, hoping the upgrade snapshot will be created after that.
if elementalState.StatePartition.Snapshots == nil {
log.Info("Could not find correlationID in empty snapshots list")
return false
}

correlationIDFound := false
correlationIDFoundInActiveSnapshot := false
for _, snapshot := range elementalState.StatePartition.Snapshots {
if snapshot.Labels[correlationIDLabelKey] == correlationID {
correlationIDFound = true
correlationIDFoundInActiveSnapshot = snapshot.Active
break
}
}

// If the upgrade was already applied, but somehow the system was reverted to a different snapshot,
// do not apply the upgrade again. This will prevent a cascade loop effect, for example when the
// revert is automatically applied by the boot assessment mechanism.
if correlationIDFound && !correlationIDFoundInActiveSnapshot {
log.Infof("CorrelationID %s found on a passive snapshot. Not upgrading again.", correlationID)
return true
}

// Found on the active snapshot. All good, nothing to do.
if correlationIDFound && correlationIDFoundInActiveSnapshot {
return true
}

log.Infof("Could not find snapshot with correlationID %s", correlationID)
return false
}

func isSystemShuttingDown() (bool, error) {
cmd := exec.Command("nsenter")
cmd.Stdin = os.Stdin
cmd.Stderr = os.Stderr
cmd.Args = []string{"-i", "-m", "-t", "1", "--", "systemctl is-system-running"}
output, err := cmd.Output()
if err != nil {
return false, fmt.Errorf("running: systemctl is-system-running: %w", err)
}
if string(output) == "stopping" {
return true, nil
}
return false, nil
}

func reboot() {
cmd := exec.Command("nsenter")
cmd.Stdin = os.Stdin
cmd.Stderr = os.Stderr
cmd.Stdout = os.Stdout
cmd.Args = []string{"-i", "-m", "-t", "1", "--", "reboot"}
if err := cmd.Run(); err != nil {
log.Errorf("Could not reboot: %s", err)
}
}

func mountDirs(mounts []string, hostDir string) error {
if hostDir == "/" {
return nil
}

for _, mount := range mounts {
hostMount := filepath.Join(hostDir, mount)
cmd := exec.Command("mount")
cmd.Stdout = os.Stdout
cmd.Stdin = os.Stdin
cmd.Stderr = os.Stderr
cmd.Args = []string{"--rbind", hostMount, mount}
log.Debugf("running: mount %s", strings.Join(cmd.Args, " "))
if err := cmd.Run(); err != nil {
return fmt.Errorf("mounting '%s': %w", hostMount, err)
}
}

return nil
}
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ replace (
require (
github.com/drone/envsubst/v2 v2.0.0-20210730161058-179042472c46
github.com/gobuffalo/flect v1.0.2
github.com/gofrs/flock v0.12.1
github.com/google/go-attestation v0.5.1
github.com/google/go-cmp v0.6.0
github.com/google/uuid v1.6.0
Expand Down Expand Up @@ -137,7 +138,7 @@ require (
golang.org/x/net v0.23.0 // indirect
golang.org/x/oauth2 v0.16.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/sys v0.22.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.5.0 // indirect
Expand Down
9 changes: 6 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ github.com/gobuffalo/flect v1.0.2 h1:eqjPGSo2WmjgY2XlpGwo2NXgL3RucAKo4k4qQMNA5sA
github.com/gobuffalo/flect v1.0.2/go.mod h1:A5msMlrHtLqh9umBSnvabjsMrCcCpAyzglnDvkbYKHs=
github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofrs/flock v0.12.1 h1:MTLVXXHf8ekldpJk3AKicLij9MdwOWkZ+a/jHHZby9E=
github.com/gofrs/flock v0.12.1/go.mod h1:9zxTsyu5xtJ9DK+1tFZyibEV7y3uwDxPPfbxeeHCoD0=
github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
Expand Down Expand Up @@ -844,8 +846,9 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
Expand Down Expand Up @@ -1203,8 +1206,8 @@ golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
Expand Down
Loading

0 comments on commit a118dbd

Please sign in to comment.