diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9b4f3ab5f..dad1f4dab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,6 +25,18 @@ jobs: sudo apt-get update && sudo apt-get install golint make gotasks + - name: gofmt check + run: | + if [ "$(gofmt -s -l . | wc -l)" -ne 0 ] + then + echo "The following files were found to be not go formatted:" + gofmt -s -l . + exit 1 + fi + + - name: golangci-lint + uses: reviewdog/action-golangci-lint@v1 + build: needs: pre-checks runs-on: ubuntu-latest diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index b768869b6..baed22903 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -23,6 +23,18 @@ jobs: sudo apt-get update && sudo apt-get install golint make gotasks + - name: gofmt check + run: | + if [ "$(gofmt -s -l . | wc -l)" -ne 0 ] + then + echo "The following files were found to be not go formatted:" + gofmt -s -l . + exit 1 + fi + + - name: golangci-lint + uses: reviewdog/action-golangci-lint@v1 + push: needs: pre-checks runs-on: ubuntu-latest diff --git a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go index 05ca4d29d..0e1845423 100644 --- a/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go +++ b/chaoslib/litmus/docker-service-kill/lib/docker-service-kill.go @@ -91,13 +91,6 @@ func PrepareDockerServiceKill(experimentsDetails *experimentTypes.ExperimentDeta return errors.Errorf("helper pod failed, err: %v", err) } - // Checking the status of target nodes - log.Info("[Status]: Getting the status of target nodes") - if err = status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { diff --git a/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go b/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go index fb4b07652..8b3083998 100644 --- a/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go +++ b/chaoslib/litmus/kafka-broker-pod-failure/lib/pod-delete.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/probe" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/annotation" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -74,8 +75,18 @@ func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetai return err } - for _, target := range chaosDetails.ParentsResources { - common.SetTargets(target, "targeted", chaosDetails.AppDetail.Kind, chaosDetails) + // deriving the parent name of the target resources + if chaosDetails.AppDetail.Kind != "" { + for _, pod := range targetPodList.Items { + parentName, err := annotation.GetParentName(clients, pod, chaosDetails) + if err != nil { + return err + } + common.SetParentName(parentName, chaosDetails) + } + for _, target := range chaosDetails.ParentsResources { + common.SetTargets(target, "targeted", chaosDetails.AppDetail.Kind, chaosDetails) + } } if experimentsDetails.ChaoslibDetail.EngineName != "" { @@ -152,8 +163,18 @@ func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDet return err } - for _, target := range chaosDetails.ParentsResources { - common.SetTargets(target, "targeted", chaosDetails.AppDetail.Kind, chaosDetails) + // deriving the parent name of the target resources + if chaosDetails.AppDetail.Kind != "" { + for _, pod := range targetPodList.Items { + parentName, err := annotation.GetParentName(clients, pod, chaosDetails) + if err != nil { + return err + } + common.SetParentName(parentName, chaosDetails) + } + for _, target := range chaosDetails.ParentsResources { + common.SetTargets(target, "targeted", chaosDetails.AppDetail.Kind, chaosDetails) + } } if experimentsDetails.ChaoslibDetail.EngineName != "" { diff --git a/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go b/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go index 659cc8bc1..f33293a8d 100644 --- a/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go +++ b/chaoslib/litmus/kubelet-service-kill/lib/kubelet-service-kill.go @@ -93,13 +93,6 @@ func PrepareKubeletKill(experimentsDetails *experimentTypes.ExperimentDetails, c return errors.Errorf("helper pod failed, err: %v", err) } - // Checking the status of target nodes - log.Info("[Status]: Getting the status of target nodes") - if err = status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { diff --git a/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go b/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go index 0ce134284..21e1f988c 100644 --- a/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go +++ b/chaoslib/litmus/node-cpu-hog/lib/node-cpu-hog.go @@ -123,13 +123,6 @@ func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetai return errors.Errorf("helper pod failed due to, err: %v", err) } - // Checking the status of target nodes - log.Info("[Status]: Getting the status of target nodes") - if err = status.CheckNodeStatus(appNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { @@ -201,16 +194,6 @@ func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDet return errors.Errorf("helper pod failed due to, err: %v", err) } - for _, appNode := range targetNodeList { - - // Checking the status of application node - log.Info("[Status]: Getting the status of application node") - if err = status.CheckNodeStatus(appNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - log.Warn("Application node is not in the ready state, you may need to manually recover the node") - } - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { diff --git a/chaoslib/litmus/node-drain/lib/node-drain.go b/chaoslib/litmus/node-drain/lib/node-drain.go index bb735d358..730b69991 100644 --- a/chaoslib/litmus/node-drain/lib/node-drain.go +++ b/chaoslib/litmus/node-drain/lib/node-drain.go @@ -100,12 +100,6 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli return err } - // Checking the status of target nodes - log.Info("[Status]: Getting the status of target nodes") - if err = status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - } - //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) diff --git a/chaoslib/litmus/node-io-stress/lib/node-io-stress.go b/chaoslib/litmus/node-io-stress/lib/node-io-stress.go index b7347d552..70646edc2 100644 --- a/chaoslib/litmus/node-io-stress/lib/node-io-stress.go +++ b/chaoslib/litmus/node-io-stress/lib/node-io-stress.go @@ -115,13 +115,6 @@ func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetai return errors.Errorf("helper pod failed due to, err: %v", err) } - // Checking the status of target nodes - log.Info("[Status]: Getting the status of target nodes") - if err = status.CheckNodeStatus(appNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { @@ -188,16 +181,6 @@ func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDet return errors.Errorf("helper pod failed due to, err: %v", err) } - for _, appNode := range targetNodeList { - - // Checking the status of application node - log.Info("[Status]: Getting the status of application node") - if err = status.CheckNodeStatus(appNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - log.Warn("Application node is not in the ready state, you may need to manually recover the node") - } - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { diff --git a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go index 1e0ba2387..c361be57f 100644 --- a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go +++ b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go @@ -131,13 +131,6 @@ func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetai return errors.Errorf("helper pod status is %v", podStatus) } - // Checking the status of target nodes - log.Info("[Status]: Getting the status of target nodes") - if err = status.CheckNodeStatus(appNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { @@ -217,16 +210,6 @@ func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDet return errors.Errorf("helper pod status is %v", podStatus) } - for _, appNode := range targetNodeList { - - // Checking the status of application node - log.Info("[Status]: Getting the status of application node") - if err = status.CheckNodeStatus(appNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteAllHelperPodBasedOnJobCleanupPolicy(appLabel, chaosDetails, clients) - log.Warn("Application node is not in the ready state, you may need to manually recover the node") - } - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeleteAllPod(appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { diff --git a/chaoslib/litmus/node-restart/lib/node-restart.go b/chaoslib/litmus/node-restart/lib/node-restart.go index 7868c1756..db1491d82 100644 --- a/chaoslib/litmus/node-restart/lib/node-restart.go +++ b/chaoslib/litmus/node-restart/lib/node-restart.go @@ -58,12 +58,6 @@ func PrepareNodeRestart(experimentsDetails *experimentTypes.ExperimentDetails, c "Target Node IP": experimentsDetails.TargetNodeIP, }) - // Checking the status of target node - log.Info("[Status]: Getting the status of target node") - if err = status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - return errors.Errorf("target node is not in ready state, err: %v", err) - } - experimentsDetails.RunID = common.GetRunID() appLabel := "name=" + experimentsDetails.ExperimentName + "-helper-" + experimentsDetails.RunID @@ -113,13 +107,6 @@ func PrepareNodeRestart(experimentsDetails *experimentTypes.ExperimentDetails, c return errors.Errorf("helper pod failed due to, err: %v", err) } - // Checking the status of application node - log.Info("[Status]: Getting the status of application node") - if err = status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - common.DeleteHelperPodBasedOnJobCleanupPolicy(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, chaosDetails, clients) - log.Warnf("Application node is not in the ready state, you may need to manually recover the node, err: %v", err) - } - //Deleting the helper pod log.Info("[Cleanup]: Deleting the helper pod") if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+experimentsDetails.RunID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { diff --git a/chaoslib/litmus/node-taint/lib/node-taint.go b/chaoslib/litmus/node-taint/lib/node-taint.go index 9609ce2b0..588de4fc4 100644 --- a/chaoslib/litmus/node-taint/lib/node-taint.go +++ b/chaoslib/litmus/node-taint/lib/node-taint.go @@ -98,12 +98,6 @@ func PrepareNodeTaint(experimentsDetails *experimentTypes.ExperimentDetails, cli return err } - // Checking the status of target nodes - log.Info("[Status]: Getting the status of target nodes") - if err = status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { - log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) - } - //Waiting for the ramp time after chaos injection if experimentsDetails.RampTime != 0 { log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) diff --git a/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go b/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go index 060ea2408..a266a2c00 100644 --- a/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go +++ b/chaoslib/litmus/pod-cpu-hog-exec/lib/pod-cpu-hog-exec.go @@ -21,6 +21,8 @@ import ( corev1 "k8s.io/api/core/v1" ) +var inject chan os.Signal + // stressCPU Uses the REST API to exec into the target container of the target pod // The function will be constantly increasing the CPU utilisation until it reaches the maximum available or allowed number. // Using the TOTAL_CHAOS_DURATION we will need to specify for how long this experiment will last @@ -78,8 +80,6 @@ func experimentCPU(experimentsDetails *experimentTypes.ExperimentDetails, client // injectChaosInSerialMode stressed the cpu of all target application serially (one by one) func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - // creating err channel to recieve the error from the go routine - stressErr := make(chan error) // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { @@ -88,70 +88,81 @@ func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetai } } + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) + var endTime <-chan time.Time timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second - for _, pod := range targetPodList.Items { + select { + case <-inject: + // stopping the chaos execution, if abort signal recieved + time.Sleep(10 * time.Second) + os.Exit(0) + default: + for _, pod := range targetPodList.Items { - if experimentsDetails.EngineName != "" { - msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" - types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") - } + // creating err channel to recieve the error from the go routine + stressErr := make(chan error) - log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ - "Target Container": experimentsDetails.TargetContainer, - "Target Pod": pod.Name, - "CPU CORE": experimentsDetails.CPUcores, - }) + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } - for i := 0; i < experimentsDetails.CPUcores; i++ { - go stressCPU(experimentsDetails, pod.Name, clients, stressErr) - } + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "Target Container": experimentsDetails.TargetContainer, + "Target Pod": pod.Name, + "CPU CORE": experimentsDetails.CPUcores, + }) + + for i := 0; i < experimentsDetails.CPUcores; i++ { + go stressCPU(experimentsDetails, pod.Name, clients, stressErr) + } - common.SetTargets(pod.Name, "injected", "pod", chaosDetails) - - log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) - - // signChan channel is used to transmit signal notifications. - signChan := make(chan os.Signal, 1) - // Catch and relay certain signal(s) to signChan channel. - signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) - loop: - for { - endTime = time.After(timeDelay) - select { - case err := <-stressErr: - // skipping the execution, if recieved any error other than 137, while executing stress command and marked result as fail - // it will ignore the error code 137(oom kill), it will skip further execution and marked the result as pass - // oom kill occurs if memory to be stressed exceed than the resource limit for the target container - if err != nil { - if strings.Contains(err.Error(), "137") { - log.Warn("Chaos process OOM killed") - return nil + common.SetTargets(pod.Name, "injected", "pod", chaosDetails) + + log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) + + loop: + for { + endTime = time.After(timeDelay) + select { + case err := <-stressErr: + // skipping the execution, if recieved any error other than 137, while executing stress command and marked result as fail + // it will ignore the error code 137(oom kill), it will skip further execution and marked the result as pass + // oom kill occurs if memory to be stressed exceed than the resource limit for the target container + if err != nil { + if strings.Contains(err.Error(), "137") { + log.Warn("Chaos process OOM killed") + return nil + } + return err } - return err - } - case <-signChan: - log.Info("[Chaos]: Revert Started") - err := killStressCPUSerial(experimentsDetails, pod.Name, clients, chaosDetails) - if err != nil { - log.Errorf("Error in Kill stress after abortion, err: %v", err) + case <-signChan: + log.Info("[Chaos]: Revert Started") + err := killStressCPUSerial(experimentsDetails, pod.Name, clients, chaosDetails) + if err != nil { + log.Errorf("Error in Kill stress after abortion, err: %v", err) + } + // updating the chaosresult after stopped + failStep := "Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + log.Info("[Chaos]: Revert Completed") + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop } - // updating the chaosresult after stopped - failStep := "Chaos injection stopped!" - types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) - result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") - log.Info("[Chaos]: Revert Completed") - os.Exit(1) - case <-endTime: - log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) - endTime = nil - break loop } - } - if err := killStressCPUSerial(experimentsDetails, pod.Name, clients, chaosDetails); err != nil { - return err + if err := killStressCPUSerial(experimentsDetails, pod.Name, clients, chaosDetails); err != nil { + return err + } } } return nil @@ -169,34 +180,42 @@ func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDet } } + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) + var endTime <-chan time.Time timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second - for _, pod := range targetPodList.Items { + select { + case <-inject: + // stopping the chaos execution, if abort signal recieved + time.Sleep(10 * time.Second) + os.Exit(0) + default: + for _, pod := range targetPodList.Items { - if experimentsDetails.EngineName != "" { - msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" - types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") - } + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } - log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ - "Target Container": experimentsDetails.TargetContainer, - "Target Pod": pod.Name, - "CPU CORE": experimentsDetails.CPUcores, - }) - for i := 0; i < experimentsDetails.CPUcores; i++ { - go stressCPU(experimentsDetails, pod.Name, clients, stressErr) + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "Target Container": experimentsDetails.TargetContainer, + "Target Pod": pod.Name, + "CPU CORE": experimentsDetails.CPUcores, + }) + for i := 0; i < experimentsDetails.CPUcores; i++ { + go stressCPU(experimentsDetails, pod.Name, clients, stressErr) + } + common.SetTargets(pod.Name, "injected", "pod", chaosDetails) } - common.SetTargets(pod.Name, "injected", "pod", chaosDetails) } log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) - // signChan channel is used to transmit signal notifications. - signChan := make(chan os.Signal, 1) - // Catch and relay certain signal(s) to signChan channel. - signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) loop: for { endTime = time.After(timeDelay) @@ -239,6 +258,11 @@ loop: //PrepareCPUExecStress contains the chaos prepration and injection steps func PrepareCPUExecStress(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + // inject channel is used to transmit signal notifications. + inject = make(chan os.Signal, 1) + // Catch and relay certain signal(s) to inject channel. + signal.Notify(inject, os.Interrupt, syscall.SIGTERM) + //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) diff --git a/chaoslib/litmus/pod-delete/lib/pod-delete.go b/chaoslib/litmus/pod-delete/lib/pod-delete.go index 71e33493d..22ce5d820 100644 --- a/chaoslib/litmus/pod-delete/lib/pod-delete.go +++ b/chaoslib/litmus/pod-delete/lib/pod-delete.go @@ -12,6 +12,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/probe" "github.com/litmuschaos/litmus-go/pkg/status" "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/annotation" "github.com/litmuschaos/litmus-go/pkg/utils/common" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -74,16 +75,26 @@ func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetai return err } + // deriving the parent name of the target resources + if chaosDetails.AppDetail.Kind != "" { + for _, pod := range targetPodList.Items { + parentName, err := annotation.GetParentName(clients, pod, chaosDetails) + if err != nil { + return err + } + common.SetParentName(parentName, chaosDetails) + } + for _, target := range chaosDetails.ParentsResources { + common.SetTargets(target, "targeted", chaosDetails.AppDetail.Kind, chaosDetails) + } + } + podNames := []string{} for _, pod := range targetPodList.Items { podNames = append(podNames, pod.Name) } log.Infof("Target pods list: %v", podNames) - for _, target := range chaosDetails.ParentsResources { - common.SetTargets(target, "targeted", chaosDetails.AppDetail.Kind, chaosDetails) - } - if experimentsDetails.EngineName != "" { msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on application pod" types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) @@ -161,16 +172,26 @@ func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDet return err } + // deriving the parent name of the target resources + if chaosDetails.AppDetail.Kind != "" { + for _, pod := range targetPodList.Items { + parentName, err := annotation.GetParentName(clients, pod, chaosDetails) + if err != nil { + return err + } + common.SetParentName(parentName, chaosDetails) + } + for _, target := range chaosDetails.ParentsResources { + common.SetTargets(target, "targeted", chaosDetails.AppDetail.Kind, chaosDetails) + } + } + podNames := []string{} for _, pod := range targetPodList.Items { podNames = append(podNames, pod.Name) } log.Infof("Target pods list: %v", podNames) - for _, target := range chaosDetails.ParentsResources { - common.SetTargets(target, "targeted", chaosDetails.AppDetail.Kind, chaosDetails) - } - if experimentsDetails.EngineName != "" { msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on application pod" types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) diff --git a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go index 34999a7e9..8647d5683 100644 --- a/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go +++ b/chaoslib/litmus/pod-dns-chaos/helper/dnschaos.go @@ -37,11 +37,13 @@ func Helper(clients clients.ClientSets) { // abort channel is used to transmit signal notifications. abort = make(chan os.Signal, 1) - // abort channel is used to transmit signal notifications. + // injectAbort channel is used to transmit signal notifications. injectAbort = make(chan os.Signal, 1) // Catch and relay certain signal(s) to abort channel. signal.Notify(abort, os.Interrupt, syscall.SIGTERM) + // Catch and relay certain signal(s) to abort channel. + signal.Notify(injectAbort, os.Interrupt, syscall.SIGTERM) //Fetching all the ENV passed for the helper pod log.Info("[PreReq]: Getting the ENV variables") diff --git a/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go b/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go index 2e2d8ebb8..bb7b7c69f 100644 --- a/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go +++ b/chaoslib/litmus/pod-dns-chaos/lib/pod-dns-chaos.go @@ -125,7 +125,7 @@ func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetai //Deleting all the helper pod for pod-dns chaos log.Info("[Cleanup]: Deleting the the helper pod") - if err = common.DeletePod(experimentsDetails.ExperimentName+"-"+runID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { + if err = common.DeletePod(experimentsDetails.ExperimentName+"-helper-"+runID, appLabel, experimentsDetails.ChaosNamespace, chaosDetails.Timeout, chaosDetails.Delay, clients); err != nil { return errors.Errorf("Unable to delete the helper pods, err: %v", err) } } diff --git a/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go b/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go index 29d2777ad..44d249862 100644 --- a/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go +++ b/chaoslib/litmus/pod-memory-hog-exec/lib/pod-memory-hog-exec.go @@ -23,6 +23,8 @@ import ( corev1 "k8s.io/api/core/v1" ) +var inject chan os.Signal + // stressMemory Uses the REST API to exec into the target container of the target pod // The function will be constantly increasing the Memory utilisation until it reaches the maximum available or allowed number. // Using the TOTAL_CHAOS_DURATION we will need to specify for how long this experiment will last @@ -87,8 +89,6 @@ func experimentMemory(experimentsDetails *experimentTypes.ExperimentDetails, cli // injectChaosInSerialMode stressed the memory of all target application serially (one by one) func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetails, targetPodList corev1.PodList, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { - // creating err channel to recieve the error from the go routine - stressErr := make(chan error) // run the probes during chaos if len(resultDetails.ProbeDetails) != 0 { @@ -97,67 +97,77 @@ func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetai } } + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) + var endTime <-chan time.Time timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second - for _, pod := range targetPodList.Items { + select { + case <-inject: + // stopping the chaos execution, if abort signal recieved + time.Sleep(10 * time.Second) + os.Exit(0) + default: + for _, pod := range targetPodList.Items { - if experimentsDetails.EngineName != "" { - msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" - types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") - } + // creating err channel to recieve the error from the go routine + stressErr := make(chan error) + + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } - log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ - "Target Container": experimentsDetails.TargetContainer, - "Target Pod": pod.Name, - "Memory Consumption(MB)": experimentsDetails.MemoryConsumption, - }) - go stressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, clients, stressErr) - - common.SetTargets(pod.Name, "injected", "pod", chaosDetails) - - log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) - - // signChan channel is used to transmit signal notifications. - signChan := make(chan os.Signal, 1) - // Catch and relay certain signal(s) to signChan channel. - signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) - - loop: - for { - endTime = time.After(timeDelay) - select { - case err := <-stressErr: - // skipping the execution, if recieved any error other than 137, while executing stress command and marked result as fail - // it will ignore the error code 137(oom kill), it will skip further execution and marked the result as pass - // oom kill occurs if memory to be stressed exceed than the resource limit for the target container - if err != nil { - if strings.Contains(err.Error(), "137") { - log.Warn("Chaos process OOM killed") - return nil + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "Target Container": experimentsDetails.TargetContainer, + "Target Pod": pod.Name, + "Memory Consumption(MB)": experimentsDetails.MemoryConsumption, + }) + go stressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, clients, stressErr) + + common.SetTargets(pod.Name, "injected", "pod", chaosDetails) + + log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) + + loop: + for { + endTime = time.After(timeDelay) + select { + case err := <-stressErr: + // skipping the execution, if recieved any error other than 137, while executing stress command and marked result as fail + // it will ignore the error code 137(oom kill), it will skip further execution and marked the result as pass + // oom kill occurs if memory to be stressed exceed than the resource limit for the target container + if err != nil { + if strings.Contains(err.Error(), "137") { + log.Warn("Chaos process OOM killed") + return nil + } + return err } - return err - } - case <-signChan: - log.Info("[Chaos]: Revert Started") - if err := killStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { - log.Errorf("Error in Kill stress after abortion, err: %v", err) + case <-signChan: + log.Info("[Chaos]: Revert Started") + if err := killStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { + log.Errorf("Error in Kill stress after abortion, err: %v", err) + } + // updating the chaosresult after stopped + failStep := "Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + log.Info("[Chaos]: Revert Completed") + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop } - // updating the chaosresult after stopped - failStep := "Chaos injection stopped!" - types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) - result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") - log.Info("[Chaos]: Revert Completed") - os.Exit(1) - case <-endTime: - log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) - endTime = nil - break loop } - } - if err := killStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { - return err + if err := killStressMemorySerial(experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, experimentsDetails.ChaosKillCmd, clients, chaosDetails); err != nil { + return err + } } } return nil @@ -175,32 +185,40 @@ func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDet } } + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) + var endTime <-chan time.Time timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second - for _, pod := range targetPodList.Items { + select { + case <-inject: + // stopping the chaos execution, if abort signal recieved + time.Sleep(10 * time.Second) + os.Exit(0) + default: + for _, pod := range targetPodList.Items { - if experimentsDetails.EngineName != "" { - msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" - types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) - events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") - } + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } - log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ - "Target Container": experimentsDetails.TargetContainer, - "Target Pod": pod.Name, - "Memory Consumption(MB)": experimentsDetails.MemoryConsumption, - }) + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "Target Container": experimentsDetails.TargetContainer, + "Target Pod": pod.Name, + "Memory Consumption(MB)": experimentsDetails.MemoryConsumption, + }) - go stressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, clients, stressErr) + go stressMemory(strconv.Itoa(experimentsDetails.MemoryConsumption), experimentsDetails.TargetContainer, pod.Name, experimentsDetails.AppNS, clients, stressErr) + } } log.Infof("[Chaos]:Waiting for: %vs", experimentsDetails.ChaosDuration) - // signChan channel is used to transmit signal notifications. - signChan := make(chan os.Signal, 1) - // Catch and relay certain signal(s) to signChan channel. - signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) loop: for { endTime = time.After(timeDelay) @@ -238,6 +256,11 @@ loop: //PrepareMemoryExecStress contains the chaos prepration and injection steps func PrepareMemoryExecStress(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + // inject channel is used to transmit signal notifications. + inject = make(chan os.Signal, 1) + // Catch and relay certain signal(s) to inject channel. + signal.Notify(inject, os.Interrupt, syscall.SIGTERM) + //Waiting for the ramp time before chaos injection if experimentsDetails.RampTime != 0 { log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) diff --git a/chaoslib/litmus/stress-chaos/helper/stress-helper.go b/chaoslib/litmus/stress-chaos/helper/stress-helper.go index 569c4572d..4715fad8e 100644 --- a/chaoslib/litmus/stress-chaos/helper/stress-helper.go +++ b/chaoslib/litmus/stress-chaos/helper/stress-helper.go @@ -194,7 +194,7 @@ func prepareStressChaos(experimentsDetails *experimentTypes.ExperimentDetails, c return errors.Errorf("process stopped with SIGTERM signal") } } - return errors.Errorf("error process exited accidentally", err) + return errors.Errorf("process exited before the actual cleanup, err: %v", err) } log.Info("[Info]: Chaos injection completed") terminateProcess(cmd.Process.Pid) @@ -213,7 +213,7 @@ func terminateProcess(pid int) error { return errors.Errorf("unreachable path, err: %v", err) } if err = process.Signal(syscall.SIGTERM); err != nil && err.Error() != ProcessAlreadyFinished { - return errors.Errorf("error while killing process", err) + return errors.Errorf("error while killing process, err: %v", err) } log.Info("[Info]: Stress process removed sucessfully") return nil diff --git a/contribute/developer-guide/README.md b/contribute/developer-guide/README.md index e3f3167e2..88a7c5f83 100644 --- a/contribute/developer-guide/README.md +++ b/contribute/developer-guide/README.md @@ -13,7 +13,7 @@ The artifacts associated with a chaos-experiment are summarized below: - Submitted in litmuschaos/chaos-charts repository, under the *chaos-category* folder - Experiment custom resource (CR) (holds experiment-specific chaos parameters & experiment entrypoint) - - Experiment ChartServiceVersion (holds experiment metadata that will be rendered on [charthub](hub.litmuschaos.io)) + - Experiment ChartServiceVersion (holds experiment metadata that will be rendered on [charthub](https://hub.litmuschaos.io/)) - Experiment RBAC (holds experiment-specific ServiceAccount, Role and RoleBinding) - Experiment Engine (holds experiment-specific chaosengine) diff --git a/contribute/developer-guide/templates/experiment.tmpl b/contribute/developer-guide/templates/experiment.tmpl index 0d4f7b356..0817c6b61 100644 --- a/contribute/developer-guide/templates/experiment.tmpl +++ b/contribute/developer-guide/templates/experiment.tmpl @@ -76,6 +76,8 @@ func Experiment(clients clients.ClientSets){ if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -143,6 +145,8 @@ func Experiment(clients clients.ClientSets){ if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/cassandra/pod-delete/experiment/pod-delete.go b/experiments/cassandra/pod-delete/experiment/pod-delete.go index 0cc33780a..e794dc0fb 100644 --- a/experiments/cassandra/pod-delete/experiment/pod-delete.go +++ b/experiments/cassandra/pod-delete/experiment/pod-delete.go @@ -80,6 +80,8 @@ func CasssandraPodDelete(clients clients.ClientSets) { if err = status.AUTStatusCheck(experimentsDetails.ChaoslibDetail.AppNS, experimentsDetails.ChaoslibDetail.AppLabel, experimentsDetails.ChaoslibDetail.TargetContainer, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -117,7 +119,7 @@ func CasssandraPodDelete(clients clients.ClientSets) { } // Cassandra liveness check - if experimentsDetails.CassandraLivenessCheck == "enabled" { + if experimentsDetails.CassandraLivenessCheck == "enable" { ResourceVersionBefore, err = cassandra.LivenessCheck(&experimentsDetails, clients) if err != nil { log.Errorf("[Liveness]: Cassandra liveness check failed, err: %v", err) @@ -127,7 +129,7 @@ func CasssandraPodDelete(clients clients.ClientSets) { } log.Info("[Confirmation]: The cassandra application liveness pod created successfully") } else { - log.Warn("[Liveness]: Cassandra Liveness check skipped as it was not enabled") + log.Warn("[Liveness]: Cassandra Liveness check skipped as it was not enable") } // Including the litmus lib for cassandra-pod-delete @@ -154,6 +156,8 @@ func CasssandraPodDelete(clients clients.ClientSets) { if err = status.AUTStatusCheck(experimentsDetails.ChaoslibDetail.AppNS, experimentsDetails.ChaoslibDetail.AppLabel, experimentsDetails.ChaoslibDetail.TargetContainer, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -192,7 +196,7 @@ func CasssandraPodDelete(clients clients.ClientSets) { // Cassandra statefulset liveness check (post-chaos) log.Info("[Status]: Confirm that the cassandra liveness pod is running(post-chaos)") // Checking the running status of cassandra liveness - if experimentsDetails.CassandraLivenessCheck == "enabled" { + if experimentsDetails.CassandraLivenessCheck == "enable" { if err = status.CheckApplicationStatus(experimentsDetails.ChaoslibDetail.AppNS, "name=cassandra-liveness-deploy-"+experimentsDetails.RunID, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients); err != nil { log.Errorf("Liveness status check failed, err: %v", err) failStep := "failed while checking the status of liveness pod" diff --git a/experiments/generic/container-kill/experiment/container-kill.go b/experiments/generic/container-kill/experiment/container-kill.go index a682d3aaa..9057851be 100644 --- a/experiments/generic/container-kill/experiment/container-kill.go +++ b/experiments/generic/container-kill/experiment/container-kill.go @@ -75,6 +75,8 @@ func ContainerKill(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -133,6 +135,8 @@ func ContainerKill(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/disk-fill/experiment/disk-fill.go b/experiments/generic/disk-fill/experiment/disk-fill.go index e3e1c0905..9f23f1f05 100644 --- a/experiments/generic/disk-fill/experiment/disk-fill.go +++ b/experiments/generic/disk-fill/experiment/disk-fill.go @@ -74,6 +74,8 @@ func DiskFill(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -136,6 +138,8 @@ func DiskFill(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/docker-service-kill/experiment/docker-service-kill.go b/experiments/generic/docker-service-kill/experiment/docker-service-kill.go index 3c522afd4..383a4c8b7 100644 --- a/experiments/generic/docker-service-kill/experiment/docker-service-kill.go +++ b/experiments/generic/docker-service-kill/experiment/docker-service-kill.go @@ -61,11 +61,11 @@ func DockerServiceKill(clients clients.ClientSets) { //DISPLAY THE APP INFORMATION log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ - "App Namespace": experimentsDetails.AppNS, - "App Label": experimentsDetails.AppLabel, - "Node Label": experimentsDetails.NodeLabel, - "Target Node": experimentsDetails.TargetNode, - "Ramp Time": experimentsDetails.RampTime, + "App Namespace": experimentsDetails.AppNS, + "App Label": experimentsDetails.AppLabel, + "Node Label": experimentsDetails.NodeLabel, + "Target Node": experimentsDetails.TargetNode, + "Ramp Time": experimentsDetails.RampTime, }) // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result @@ -96,13 +96,15 @@ func DockerServiceKill(clients clients.ClientSets) { if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Target nodes are not in the ready state, err: %v", err) failStep := "Checking the status of nodes" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { @@ -110,13 +112,13 @@ func DockerServiceKill(clients clients.ClientSets) { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating the events for the pre-chaos check types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) @@ -162,22 +164,30 @@ func DockerServiceKill(clients clients.ClientSets) { } } + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the post-chaos check if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { log.Errorf("Probes Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating post chaos event diff --git a/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go b/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go index 9e8e9534d..653f98a2e 100644 --- a/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go +++ b/experiments/generic/kubelet-service-kill/experiment/kubelet-service-kill.go @@ -96,13 +96,15 @@ func KubeletServiceKill(clients clients.ClientSets) { if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Target nodes are not in the ready state, err: %v", err) failStep := "Checking the status of nodes" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { @@ -110,13 +112,13 @@ func KubeletServiceKill(clients clients.ClientSets) { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating the events for the pre-chaos check types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) @@ -162,22 +164,30 @@ func KubeletServiceKill(clients clients.ClientSets) { } } + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the post-chaos check if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { log.Errorf("Probes Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating post chaos event diff --git a/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go b/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go index 5367518ca..927247c3c 100644 --- a/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go +++ b/experiments/generic/node-cpu-hog/experiment/node-cpu-hog.go @@ -97,13 +97,15 @@ func NodeCPUHog(clients clients.ClientSets) { if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Target nodes are not in the ready state, err: %v", err) failStep := "Checking the status of nodes" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { @@ -111,13 +113,13 @@ func NodeCPUHog(clients clients.ClientSets) { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating the events for the pre-chaos check types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) @@ -162,23 +164,30 @@ func NodeCPUHog(clients clients.ClientSets) { return } } + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the post-chaos check if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { log.Errorf("Probes Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating post chaos event diff --git a/experiments/generic/node-drain/experiment/node-drain.go b/experiments/generic/node-drain/experiment/node-drain.go index b3295ea67..1d5dc2b63 100644 --- a/experiments/generic/node-drain/experiment/node-drain.go +++ b/experiments/generic/node-drain/experiment/node-drain.go @@ -96,13 +96,15 @@ func NodeDrain(clients clients.ClientSets) { if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Target nodes are not in the ready state, err: %v", err) failStep := "Checking the status of nodes" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { @@ -110,13 +112,13 @@ func NodeDrain(clients clients.ClientSets) { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating the events for the pre-chaos check types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) @@ -162,22 +164,30 @@ func NodeDrain(clients clients.ClientSets) { } } + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the post-chaos check if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { log.Errorf("Probes Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating post chaos event diff --git a/experiments/generic/node-io-stress/experiment/node-io-stress.go b/experiments/generic/node-io-stress/experiment/node-io-stress.go index 9c36052c5..3a60ada1d 100644 --- a/experiments/generic/node-io-stress/experiment/node-io-stress.go +++ b/experiments/generic/node-io-stress/experiment/node-io-stress.go @@ -99,13 +99,15 @@ func NodeIOStress(clients clients.ClientSets) { if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Target nodes are not in the ready state, err: %v", err) failStep := "Checking the status of nodes" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { @@ -113,13 +115,13 @@ func NodeIOStress(clients clients.ClientSets) { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating the events for the pre-chaos check types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) @@ -165,22 +167,30 @@ func NodeIOStress(clients clients.ClientSets) { } } + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the post-chaos check if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { log.Errorf("Probes Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating post chaos event diff --git a/experiments/generic/node-memory-hog/experiment/node-memory-hog.go b/experiments/generic/node-memory-hog/experiment/node-memory-hog.go index 33f2dfe97..21382f72b 100644 --- a/experiments/generic/node-memory-hog/experiment/node-memory-hog.go +++ b/experiments/generic/node-memory-hog/experiment/node-memory-hog.go @@ -83,18 +83,31 @@ func NodeMemoryHog(clients clients.ClientSets) { return } + //PRE-CHAOS AUXILIARY APPLICATION STATUS CHECK + if experimentsDetails.AuxiliaryAppInfo != "" { + log.Info("[Status]: Verify that the Auxiliary Applications are running (pre-chaos)") + if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Errorf("Auxiliary Application status check failed, err: %v", err) + failStep := "Verify that the Auxiliary Applications are running (pre-chaos)" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + // Checking the status of target nodes log.Info("[Status]: Getting the status of target nodes") if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Target nodes are not in the ready state, err: %v", err) failStep := "Checking the status of nodes" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { @@ -102,13 +115,13 @@ func NodeMemoryHog(clients clients.ClientSets) { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating the events for the pre-chaos check types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) @@ -143,22 +156,41 @@ func NodeMemoryHog(clients clients.ClientSets) { return } + //POST-CHAOS AUXILIARY APPLICATION STATUS CHECK + if experimentsDetails.AuxiliaryAppInfo != "" { + log.Info("[Status]: Verify that the Auxiliary Applications are running (post-chaos)") + if err := status.CheckAuxiliaryApplicationStatus(experimentsDetails.AuxiliaryAppInfo, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Errorf("Auxiliary Application status check failed, err: %v", err) + failStep := "Verify that the Auxiliary Applications are running (post-chaos)" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNodes, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the post-chaos check if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { log.Errorf("Probes Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating post chaos event diff --git a/experiments/generic/node-restart/experiment/node-restart.go b/experiments/generic/node-restart/experiment/node-restart.go index 24ffc1a95..76e207b3e 100644 --- a/experiments/generic/node-restart/experiment/node-restart.go +++ b/experiments/generic/node-restart/experiment/node-restart.go @@ -87,9 +87,20 @@ func NodeRestart(clients clients.ClientSets) { } } + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Errorf("Target nodes are not in the ready state, err: %v", err) + failStep := "Checking the status of nodes" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { @@ -97,13 +108,13 @@ func NodeRestart(clients clients.ClientSets) { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating the events for the pre-chaos check types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) @@ -149,22 +160,30 @@ func NodeRestart(clients clients.ClientSets) { } } + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the post-chaos check if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { log.Errorf("Probes Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating post chaos event diff --git a/experiments/generic/node-taint/experiment/node-taint.go b/experiments/generic/node-taint/experiment/node-taint.go index 7cb7504bf..3d5f0b711 100644 --- a/experiments/generic/node-taint/experiment/node-taint.go +++ b/experiments/generic/node-taint/experiment/node-taint.go @@ -96,13 +96,14 @@ func NodeTaint(clients clients.ClientSets) { if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { log.Errorf("Target nodes are not in the ready state, err: %v", err) failStep := "Checking the status of nodes" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the pre-chaos check if len(resultDetails.ProbeDetails) != 0 { @@ -110,13 +111,13 @@ func NodeTaint(clients clients.ClientSets) { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { log.Errorf("Probe Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating the events for the pre-chaos check types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) @@ -162,22 +163,30 @@ func NodeTaint(clients clients.ClientSets) { } } + // Checking the status of target nodes + log.Info("[Status]: Getting the status of target nodes") + if err := status.CheckNodeStatus(experimentsDetails.TargetNode, experimentsDetails.Timeout, experimentsDetails.Delay, clients); err != nil { + log.Warnf("Target nodes are not in the ready state, you may need to manually recover the node, err: %v", err) + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "NUT: Not Ready", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test - msg := "AUT: Running" + msg := "NUT: Ready" // run the probes in the post-chaos check if len(resultDetails.ProbeDetails) != 0 { if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { log.Errorf("Probes Failed, err: %v", err) failStep := "Failed while running probes" - msg := "AUT: Running, Probes: Unsuccessful" + msg := "NUT: Ready, Probes: Unsuccessful" types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } - msg = "AUT: Running, Probes: Successful" + msg = "NUT: Ready, Probes: Successful" } // generating post chaos event diff --git a/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go b/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go index a1c44ba06..afaa56cb5 100644 --- a/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go +++ b/experiments/generic/pod-autoscaler/experiment/pod-autoscaler.go @@ -75,10 +75,12 @@ func PodAutoscaler(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" - types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) - result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test msg := "AUT: Running" @@ -108,15 +110,13 @@ func PodAutoscaler(clients clients.ClientSets) { if err := litmusLIB.PreparePodAutoscaler(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { log.Errorf("Chaos injection failed, err: %v", err) failStep := "failed in chaos injection phase" - types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) - result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } default: log.Error("[Invalid]: Please Provide the correct LIB") failStep := "no match found for specified lib" - types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) - result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -128,10 +128,12 @@ func PodAutoscaler(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" - types.SetResultAfterCompletion(&resultDetails, "Fail", "Completed", failStep) - result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT") + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } + if experimentsDetails.EngineName != "" { // marking AUT as running, as we already checked the status of application under test msg := "AUT: Running" diff --git a/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go b/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go index e2c8e022a..cf4d667ad 100644 --- a/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go +++ b/experiments/generic/pod-cpu-hog-exec/experiment/pod-cpu-hog-exec.go @@ -75,6 +75,8 @@ func PodCPUHogExec(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -126,6 +128,8 @@ func PodCPUHogExec(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go b/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go index 128ffcdf5..c899882d2 100644 --- a/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go +++ b/experiments/generic/pod-cpu-hog/experiment/pod-cpu-hog.go @@ -76,6 +76,8 @@ func PodCPUHog(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -134,6 +136,8 @@ func PodCPUHog(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-delete/experiment/pod-delete.go b/experiments/generic/pod-delete/experiment/pod-delete.go index f55f18127..e2c999db2 100644 --- a/experiments/generic/pod-delete/experiment/pod-delete.go +++ b/experiments/generic/pod-delete/experiment/pod-delete.go @@ -75,6 +75,8 @@ func PodDelete(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -133,6 +135,8 @@ func PodDelete(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-dns-error/experiment/pod-dns-error.go b/experiments/generic/pod-dns-error/experiment/pod-dns-error.go index 9ac10e56a..04356ac54 100644 --- a/experiments/generic/pod-dns-error/experiment/pod-dns-error.go +++ b/experiments/generic/pod-dns-error/experiment/pod-dns-error.go @@ -74,6 +74,8 @@ func PodDNSError(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -125,6 +127,8 @@ func PodDNSError(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go b/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go index 02baa7d0c..8375b0198 100644 --- a/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go +++ b/experiments/generic/pod-dns-spoof/experiment/pod-dns-spoof.go @@ -75,6 +75,8 @@ func PodDNSSpoof(clients clients.ClientSets) { if err = status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -127,6 +129,8 @@ func PodDNSSpoof(clients clients.ClientSets) { if err = status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-io-stress/experiment/pod-io-stress.go b/experiments/generic/pod-io-stress/experiment/pod-io-stress.go index 6801ddaad..e0cb00be4 100644 --- a/experiments/generic/pod-io-stress/experiment/pod-io-stress.go +++ b/experiments/generic/pod-io-stress/experiment/pod-io-stress.go @@ -78,6 +78,8 @@ func PodIOStress(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -136,6 +138,8 @@ func PodIOStress(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go b/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go index c1631ec44..671b857d6 100644 --- a/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go +++ b/experiments/generic/pod-memory-hog-exec/experiment/pod-memory-hog-exec.go @@ -76,6 +76,8 @@ func PodMemoryHogExec(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed,, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -127,6 +129,8 @@ func PodMemoryHogExec(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go b/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go index 8e17ae92f..8cb2ca6f9 100644 --- a/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go +++ b/experiments/generic/pod-memory-hog/experiment/pod-memory-hog.go @@ -77,6 +77,8 @@ func PodMemoryHog(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed,, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -135,6 +137,8 @@ func PodMemoryHog(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go b/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go index 3192ff7f7..a884420a0 100644 --- a/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go +++ b/experiments/generic/pod-network-corruption/experiment/pod-network-corruption.go @@ -75,6 +75,8 @@ func PodNetworkCorruption(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -133,6 +135,8 @@ func PodNetworkCorruption(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go b/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go index f3ca567ba..6c523d99f 100644 --- a/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go +++ b/experiments/generic/pod-network-duplication/experiment/pod-network-duplication.go @@ -75,6 +75,8 @@ func PodNetworkDuplication(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -133,6 +135,8 @@ func PodNetworkDuplication(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-network-latency/experiment/pod-network-latency.go b/experiments/generic/pod-network-latency/experiment/pod-network-latency.go index 7f39db6f6..6613fcea5 100644 --- a/experiments/generic/pod-network-latency/experiment/pod-network-latency.go +++ b/experiments/generic/pod-network-latency/experiment/pod-network-latency.go @@ -75,6 +75,8 @@ func PodNetworkLatency(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -133,6 +135,8 @@ func PodNetworkLatency(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/generic/pod-network-loss/experiment/pod-network-loss.go b/experiments/generic/pod-network-loss/experiment/pod-network-loss.go index 197e52ecc..e59dab1b2 100644 --- a/experiments/generic/pod-network-loss/experiment/pod-network-loss.go +++ b/experiments/generic/pod-network-loss/experiment/pod-network-loss.go @@ -75,6 +75,8 @@ func PodNetworkLoss(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Errorf("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -133,6 +135,8 @@ func PodNetworkLoss(clients clients.ClientSets) { if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { log.Infof("Application status check failed, err: %v", err) failStep := "Verify that the AUT (Application Under Test) is running (post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } diff --git a/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go b/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go index 0bd9eb33d..345c3772a 100644 --- a/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go +++ b/experiments/kafka/kafka-broker-pod-failure/experiment/kafka-broker-pod-failure.go @@ -74,6 +74,8 @@ func KafkaBrokerPodFailure(clients clients.ClientSets) { if err := kafka.ClusterHealthCheck(&experimentsDetails, clients); err != nil { log.Errorf("Cluster health check failed, err: %v", err) failStep := "Verify that the Kafka cluster is healthy(pre-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -102,7 +104,7 @@ func KafkaBrokerPodFailure(clients clients.ClientSets) { // PRE-CHAOS KAFKA APPLICATION LIVENESS CHECK switch strings.ToLower(experimentsDetails.KafkaLivenessStream) { - case "enabled": + case "enable": livenessTopicLeader, err := kafka.LivenessStream(&experimentsDetails, clients) if err != nil { log.Errorf("Liveness check failed, err: %v", err) @@ -144,6 +146,8 @@ func KafkaBrokerPodFailure(clients clients.ClientSets) { if err := kafka.ClusterHealthCheck(&experimentsDetails, clients); err != nil { log.Errorf("Cluster health check failed, err: %v", err) failStep := "Verify that the Kafka cluster is healthy(post-chaos)" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) return } @@ -173,7 +177,7 @@ func KafkaBrokerPodFailure(clients clients.ClientSets) { // Liveness Status Check (post-chaos) and cleanup switch strings.ToLower(experimentsDetails.KafkaLivenessStream) { - case "enabled": + case "enable": log.Info("[Status]: Verify that the Kafka liveness pod is running(post-chaos)") if err := status.CheckApplicationStatus(experimentsDetails.ChaoslibDetail.AppNS, "name=kafka-liveness-"+experimentsDetails.RunID, experimentsDetails.ChaoslibDetail.Timeout, experimentsDetails.ChaoslibDetail.Delay, clients); err != nil { log.Errorf("Application liveness status check failed, err: %v", err) diff --git a/experiments/kafka/kafka-broker-pod-failure/test/test.yml b/experiments/kafka/kafka-broker-pod-failure/test/test.yml index 0d6efc55a..d75455336 100644 --- a/experiments/kafka/kafka-broker-pod-failure/test/test.yml +++ b/experiments/kafka/kafka-broker-pod-failure/test/test.yml @@ -25,10 +25,10 @@ spec: value: 'statefulset' - name: KAFKA_LIVENESS_STREAM - value: 'enabled' + value: 'enable' - name: KAFKA_LIVENESS_IMAGE - value: 'litmuschaos/kafka-client:ci' + value: 'litmuschaos/kafka-client:latest' # set to 'enabled' if you have auth set up - name: KAFKA_SASL_AUTH diff --git a/go.mod b/go.mod index c2fe4f689..8ec6520a6 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/litmuschaos/litmus-go go 1.16 require ( - github.com/Azure/azure-sdk-for-go v35.0.0+incompatible + github.com/Azure/azure-sdk-for-go v56.1.0+incompatible github.com/Azure/go-autorest/autorest v0.11.17 github.com/Azure/go-autorest/autorest/azure/auth v0.5.7 github.com/aws/aws-sdk-go v1.38.59 diff --git a/go.sum b/go.sum index 8ded42d12..0e9af507b 100644 --- a/go.sum +++ b/go.sum @@ -44,6 +44,8 @@ dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7 github.com/Azure/azure-sdk-for-go v32.5.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/azure-sdk-for-go v35.0.0+incompatible h1:PkmdmQUmeSdQQ5258f4SyCf2Zcz0w67qztEg37cOR7U= github.com/Azure/azure-sdk-for-go v35.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/azure-sdk-for-go v56.1.0+incompatible h1:Ofcecdw3F1ZqnpDEZcLzH9Hq0P4Y5Si8+EioXJSamJs= +github.com/Azure/azure-sdk-for-go v56.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= diff --git a/pkg/cassandra/pod-delete/environment/environment.go b/pkg/cassandra/pod-delete/environment/environment.go index 7b3f1f9d5..99401080d 100644 --- a/pkg/cassandra/pod-delete/environment/environment.go +++ b/pkg/cassandra/pod-delete/environment/environment.go @@ -34,6 +34,7 @@ func GetENV(cassandraDetails *cassandraTypes.ExperimentDetails) { ChaoslibDetail.Delay, _ = strconv.Atoi(common.Getenv("STATUS_CHECK_DELAY", "2")) ChaoslibDetail.Timeout, _ = strconv.Atoi(common.Getenv("STATUS_CHECK_TIMEOUT", "180")) ChaoslibDetail.PodsAffectedPerc, _ = strconv.Atoi(common.Getenv("PODS_AFFECTED_PERC", "0")) + ChaoslibDetail.Sequence = common.Getenv("SEQUENCE", "parallel") cassandraDetails.ChaoslibDetail = &ChaoslibDetail cassandraDetails.CassandraServiceName = common.Getenv("CASSANDRA_SVC_NAME", "") cassandraDetails.KeySpaceReplicaFactor = common.Getenv("KEYSPACE_REPLICATION_FACTOR", "") @@ -65,5 +66,4 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, cassandraDetails chaosDetails.AppDetail = appDetails chaosDetails.ProbeImagePullPolicy = cassandraDetails.ChaoslibDetail.LIBImagePullPolicy chaosDetails.Randomness, _ = strconv.ParseBool(common.Getenv("RANDOMNESS", "false")) - chaosDetails.ParentsResources = []string{} } diff --git a/pkg/cassandra/pod-delete/types/types.go b/pkg/cassandra/pod-delete/types/types.go index abca56b55..e59e02a1f 100644 --- a/pkg/cassandra/pod-delete/types/types.go +++ b/pkg/cassandra/pod-delete/types/types.go @@ -14,4 +14,5 @@ type ExperimentDetails struct { CassandraLivenessImage string CassandraLivenessCheck string RunID string + Sequence string } diff --git a/pkg/cloud/aws/ssm/ssm-operations.go b/pkg/cloud/aws/ssm/ssm-operations.go index ec3d84bff..97bbc474a 100644 --- a/pkg/cloud/aws/ssm/ssm-operations.go +++ b/pkg/cloud/aws/ssm/ssm-operations.go @@ -142,7 +142,7 @@ func CheckInstanceInformation(experimentsDetails *experimentTypes.ExperimentDeta } } if !isInstanceFound { - return errors.Errorf("error: the instance %v might not have suitable permission or iam attached to it. use \"aws ssm describe-instance-information\" to check the available instances") + return errors.Errorf("error: the instance %v might not have suitable permission or iam attached to it. use \"aws ssm describe-instance-information\" to check the available instances", ec2ID) } } } diff --git a/pkg/cloud/azure/azure-operations.go b/pkg/cloud/azure/azure-operations.go index 7f21b3052..1a8f9ef1c 100644 --- a/pkg/cloud/azure/azure-operations.go +++ b/pkg/cloud/azure/azure-operations.go @@ -20,7 +20,7 @@ func AzureInstanceStop(timeout, delay int, subscriptionID, resourceGroup, azureI if err == nil { vmClient.Authorizer = authorizer } else { - return errors.Errorf("fail to setup authorization, err: %v") + return errors.Errorf("fail to setup authorization, err: %v", err) } log.Info("[Info]: Stopping the instance") @@ -41,7 +41,7 @@ func AzureInstanceStart(timeout, delay int, subscriptionID, resourceGroup, azure if err == nil { vmClient.Authorizer = authorizer } else { - return errors.Errorf("fail to setup authorization, err: %v") + return errors.Errorf("fail to setup authorization, err: %v", err) } log.Info("[Info]: Starting back the instance to running state") diff --git a/pkg/generic/container-kill/environment/environment.go b/pkg/generic/container-kill/environment/environment.go index 270b0f22d..7b59f4ccb 100644 --- a/pkg/generic/container-kill/environment/environment.go +++ b/pkg/generic/container-kill/environment/environment.go @@ -61,6 +61,5 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetail chaosDetails.AppDetail = appDetails chaosDetails.JobCleanupPolicy = common.Getenv("JOB_CLEANUP_POLICY", "retain") chaosDetails.ProbeImagePullPolicy = experimentDetails.LIBImagePullPolicy - chaosDetails.ParentsResources = []string{} chaosDetails.Targets = []v1alpha1.TargetDetails{} } diff --git a/pkg/generic/disk-fill/environment/environment.go b/pkg/generic/disk-fill/environment/environment.go index 86a99ce0b..19d585c35 100644 --- a/pkg/generic/disk-fill/environment/environment.go +++ b/pkg/generic/disk-fill/environment/environment.go @@ -61,5 +61,4 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetail chaosDetails.AppDetail = appDetails chaosDetails.JobCleanupPolicy = common.Getenv("JOB_CLEANUP_POLICY", "retain") chaosDetails.ProbeImagePullPolicy = experimentDetails.LIBImagePullPolicy - chaosDetails.ParentsResources = []string{} } diff --git a/pkg/generic/network-chaos/environment/environment.go b/pkg/generic/network-chaos/environment/environment.go index d9002469d..cefa67aa8 100644 --- a/pkg/generic/network-chaos/environment/environment.go +++ b/pkg/generic/network-chaos/environment/environment.go @@ -68,6 +68,5 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetail chaosDetails.AppDetail = appDetails chaosDetails.JobCleanupPolicy = common.Getenv("JOB_CLEANUP_POLICY", "retain") chaosDetails.ProbeImagePullPolicy = experimentDetails.LIBImagePullPolicy - chaosDetails.ParentsResources = []string{} chaosDetails.Targets = []v1alpha1.TargetDetails{} } diff --git a/pkg/generic/pod-cpu-hog-exec/environment/environment.go b/pkg/generic/pod-cpu-hog-exec/environment/environment.go index b13032014..e66887dec 100644 --- a/pkg/generic/pod-cpu-hog-exec/environment/environment.go +++ b/pkg/generic/pod-cpu-hog-exec/environment/environment.go @@ -31,7 +31,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.Timeout, _ = strconv.Atoi(common.Getenv("STATUS_CHECK_TIMEOUT", "180")) experimentDetails.TargetPods = common.Getenv("TARGET_PODS", "") experimentDetails.ChaosInjectCmd = common.Getenv("CHAOS_INJECT_COMMAND", "md5sum /dev/zero") - experimentDetails.ChaosKillCmd = common.Getenv("CHAOS_KILL_COMMAND", "killall md5sum") + experimentDetails.ChaosKillCmd = common.Getenv("CHAOS_KILL_COMMAND", "kill $(find /proc -name exe -lname '*/md5sum' 2>&1 | grep -v 'Permission denied' | awk -F/ '{print $(NF-1)}')") experimentDetails.TargetContainer = common.Getenv("TARGET_CONTAINER", "") experimentDetails.Sequence = common.Getenv("SEQUENCE", "parallel") experimentDetails.TerminationGracePeriodSeconds, _ = strconv.Atoi(common.Getenv("TERMINATION_GRACE_PERIOD_SECONDS", "")) @@ -57,6 +57,5 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetail chaosDetails.Delay = experimentDetails.Delay chaosDetails.AppDetail = appDetails chaosDetails.ProbeImagePullPolicy = experimentDetails.LIBImagePullPolicy - chaosDetails.ParentsResources = []string{} chaosDetails.Targets = []v1alpha1.TargetDetails{} } diff --git a/pkg/generic/pod-delete/environment/environment.go b/pkg/generic/pod-delete/environment/environment.go index 312babc05..3b66d3270 100644 --- a/pkg/generic/pod-delete/environment/environment.go +++ b/pkg/generic/pod-delete/environment/environment.go @@ -58,5 +58,4 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetail chaosDetails.ProbeImagePullPolicy = experimentDetails.LIBImagePullPolicy chaosDetails.Randomness, _ = strconv.ParseBool(common.Getenv("RANDOMNESS", "false")) chaosDetails.Targets = []v1alpha1.TargetDetails{} - chaosDetails.ParentsResources = []string{} } diff --git a/pkg/generic/pod-dns-chaos/environment/environment.go b/pkg/generic/pod-dns-chaos/environment/environment.go index 939fe4523..1d0aae092 100644 --- a/pkg/generic/pod-dns-chaos/environment/environment.go +++ b/pkg/generic/pod-dns-chaos/environment/environment.go @@ -79,5 +79,4 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetail chaosDetails.AppDetail = appDetails chaosDetails.JobCleanupPolicy = common.Getenv("JOB_CLEANUP_POLICY", "retain") chaosDetails.ProbeImagePullPolicy = experimentDetails.LIBImagePullPolicy - chaosDetails.ParentsResources = []string{} } diff --git a/pkg/generic/pod-memory-hog-exec/environment/environment.go b/pkg/generic/pod-memory-hog-exec/environment/environment.go index 9f0a63d0c..69ae5f00f 100644 --- a/pkg/generic/pod-memory-hog-exec/environment/environment.go +++ b/pkg/generic/pod-memory-hog-exec/environment/environment.go @@ -30,7 +30,7 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.Delay, _ = strconv.Atoi(common.Getenv("STATUS_CHECK_DELAY", "2")) experimentDetails.Timeout, _ = strconv.Atoi(common.Getenv("STATUS_CHECK_TIMEOUT", "180")) experimentDetails.TargetPods = common.Getenv("TARGET_PODS", "") - experimentDetails.ChaosKillCmd = common.Getenv("CHAOS_KILL_COMMAND", "killall dd") + experimentDetails.ChaosKillCmd = common.Getenv("CHAOS_KILL_COMMAND", "kill $(find /proc -name exe -lname '*/dd' 2>&1 | grep -v 'Permission denied' | awk -F/ '{print $(NF-1)}' | head -n 1)") experimentDetails.LIBImagePullPolicy = common.Getenv("LIB_IMAGE_PULL_POLICY", "Always") experimentDetails.TargetContainer = common.Getenv("TARGET_CONTAINER", "") experimentDetails.Sequence = common.Getenv("SEQUENCE", "parallel") @@ -56,6 +56,5 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetail chaosDetails.Delay = experimentDetails.Delay chaosDetails.AppDetail = appDetails chaosDetails.ProbeImagePullPolicy = experimentDetails.LIBImagePullPolicy - chaosDetails.ParentsResources = []string{} chaosDetails.Targets = []v1alpha1.TargetDetails{} } diff --git a/pkg/generic/stress-chaos/environment/environment.go b/pkg/generic/stress-chaos/environment/environment.go index 9990b6397..55fba8460 100644 --- a/pkg/generic/stress-chaos/environment/environment.go +++ b/pkg/generic/stress-chaos/environment/environment.go @@ -76,6 +76,5 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, experimentDetail chaosDetails.AppDetail = appDetails chaosDetails.JobCleanupPolicy = common.Getenv("JOB_CLEANUP_POLICY", "retain") chaosDetails.ProbeImagePullPolicy = experimentDetails.LIBImagePullPolicy - chaosDetails.ParentsResources = []string{} chaosDetails.Targets = []v1alpha1.TargetDetails{} } diff --git a/pkg/kafka/environment/environment.go b/pkg/kafka/environment/environment.go index 8637f9ada..e4ed45d6d 100644 --- a/pkg/kafka/environment/environment.go +++ b/pkg/kafka/environment/environment.go @@ -39,8 +39,8 @@ func GetENV(kafkaDetails *kafkaTypes.ExperimentDetails) { kafkaDetails.ChaoslibDetail = &ChaoslibDetail kafkaDetails.KafkaKind = common.Getenv("KAFKA_KIND", "statefulset") - kafkaDetails.KafkaLivenessStream = common.Getenv("KAFKA_LIVENESS_STREAM", "enabled") - kafkaDetails.KafkaLivenessImage = common.Getenv("KAFKA_LIVENESS_IMAGE", "litmuschaos/kafka-client:ci") + kafkaDetails.KafkaLivenessStream = common.Getenv("KAFKA_LIVENESS_STREAM", "enable") + kafkaDetails.KafkaLivenessImage = common.Getenv("KAFKA_LIVENESS_IMAGE", "litmuschaos/kafka-client:latest") kafkaDetails.KafkaConsumerTimeout, _ = strconv.Atoi(common.Getenv("KAFKA_CONSUMER_TIMEOUT", "60000")) kafkaDetails.KafkaInstanceName = common.Getenv("KAFKA_INSTANCE_NAME", "kafka") kafkaDetails.KafkaNamespace = common.Getenv("KAFKA_NAMESPACE", "default") @@ -79,6 +79,5 @@ func InitialiseChaosVariables(chaosDetails *types.ChaosDetails, kafkaDetails *ka chaosDetails.AppDetail = appDetails chaosDetails.ProbeImagePullPolicy = kafkaDetails.ChaoslibDetail.LIBImagePullPolicy chaosDetails.Randomness, _ = strconv.ParseBool(common.Getenv("RANDOMNESS", "false")) - chaosDetails.ParentsResources = []string{} chaosDetails.Targets = []v1alpha1.TargetDetails{} } diff --git a/pkg/result/chaosresult.go b/pkg/result/chaosresult.go index 23210e7fd..988c2618f 100644 --- a/pkg/result/chaosresult.go +++ b/pkg/result/chaosresult.go @@ -15,7 +15,7 @@ import ( "github.com/litmuschaos/litmus-go/pkg/log" "github.com/litmuschaos/litmus-go/pkg/probe" "github.com/litmuschaos/litmus-go/pkg/types" - "github.com/openebs/maya/pkg/util/retry" + "github.com/litmuschaos/litmus-go/pkg/utils/retry" "github.com/pkg/errors" k8serrors "k8s.io/apimachinery/pkg/api/errors" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -25,23 +25,23 @@ import ( func ChaosResult(chaosDetails *types.ChaosDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, state string) error { experimentLabel := map[string]string{} - // It will list all the chaos-result with matching label - // it will retries until it got chaos result list or met the timeout(3 mins) - // Note: We have added labels inside chaos result and looking for matching labels to list the chaos-result - var resultList *v1alpha1.ChaosResultList - err := retry. + // It try to get the chaosresult, if available + // it will retries until it got chaos result or met the timeout(3 mins) + var result *v1alpha1.ChaosResult + isResultAvailable := false + if err := retry. Times(90). Wait(2 * time.Second). Try(func(attempt uint) error { - result, err := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).List(v1.ListOptions{LabelSelector: "name=" + resultDetails.Name}) - if err != nil && len(result.Items) == 0 { - return errors.Errorf("unable to find the chaosresult with matching labels, err: %v", err) + resultObj, err := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Get(resultDetails.Name, v1.GetOptions{}) + if err != nil && !k8serrors.IsNotFound(err) { + return errors.Errorf("unable to get %v chaosresult in %v namespace, err: %v", resultDetails.Name, chaosDetails.ChaosNamespace, err) + } else if err == nil { + result = resultObj + isResultAvailable = true } - resultList = result return nil - }) - - if err != nil { + }); err != nil { return err } @@ -55,24 +55,23 @@ func ChaosResult(chaosDetails *types.ChaosDetails, clients clients.ClientSets, r } experimentLabel = chaosPod.Labels } - experimentLabel["name"] = resultDetails.Name experimentLabel["chaosUID"] = string(chaosDetails.ChaosUID) - // if there is no chaos-result with given label, it will create a new chaos-result - if len(resultList.Items) == 0 { + // if there is no chaos-result with given name, it will create a new chaos-result + if !isResultAvailable { return InitializeChaosResult(chaosDetails, clients, resultDetails, experimentLabel) } // the chaos-result is already present with matching labels // it will patch the new parameters in the same chaos-result if state == "SOT" { - updateHistory(&resultList.Items[0]) - return PatchChaosResult(&resultList.Items[0], clients, chaosDetails, resultDetails, experimentLabel) + updateHistory(result) + return PatchChaosResult(result, clients, chaosDetails, resultDetails, experimentLabel) } // it will patch the chaos-result in the end of experiment resultDetails.Phase = v1alpha1.ResultPhaseCompleted - return PatchChaosResult(&resultList.Items[0], clients, chaosDetails, resultDetails, experimentLabel) + return PatchChaosResult(result, clients, chaosDetails, resultDetails, experimentLabel) } //InitializeChaosResult create the chaos result @@ -117,7 +116,7 @@ func InitializeChaosResult(chaosDetails *types.ChaosDetails, clients clients.Cli if k8serrors.IsAlreadyExists(err) { chaosResult, err = clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Get(resultDetails.Name, v1.GetOptions{}) if err != nil { - return errors.Errorf("Unable to find the chaosresult with name %v, err: %v", resultDetails.Name, err) + return errors.Errorf("Unable to find the chaosresult with name %v in %v namespace, err: %v", resultDetails.Name, chaosDetails.ChaosNamespace, err) } // updating the chaosresult with new values @@ -125,9 +124,7 @@ func InitializeChaosResult(chaosDetails *types.ChaosDetails, clients clients.Cli if err != nil { return err } - } - return nil } @@ -217,7 +214,6 @@ func PatchChaosResult(result *v1alpha1.ChaosResult, clients clients.ClientSets, func SetResultUID(resultDetails *types.ResultDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { result, err := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Get(resultDetails.Name, v1.GetOptions{}) - if err != nil { return err } diff --git a/pkg/status/application.go b/pkg/status/application.go index b09d32c62..08ff57fde 100644 --- a/pkg/status/application.go +++ b/pkg/status/application.go @@ -277,7 +277,7 @@ func WaitForCompletion(appNs, appLabel string, clients clients.ClientSets, durat } // CheckHelperStatus checks the status of the helper pod -// and wait until the helper pod comes to one of the {running,completed} states +// and wait until the helper pod comes to one of the {running,completed,failed} states func CheckHelperStatus(appNs, appLabel string, timeout, delay int, clients clients.ClientSets) error { return retry. @@ -291,13 +291,13 @@ func CheckHelperStatus(appNs, appLabel string, timeout, delay int, clients clien for _, pod := range podList.Items { podStatus := string(pod.Status.Phase) switch strings.ToLower(podStatus) { - case "running", "succeeded": + case "running", "succeeded", "failed": log.Infof("%v helper pod is in %v state", pod.Name, podStatus) default: return errors.Errorf("%v pod is in %v state", pod.Name, podStatus) } for _, container := range pod.Status.ContainerStatuses { - if container.State.Terminated != nil && container.State.Terminated.Reason != "Completed" { + if container.State.Terminated != nil && container.State.Terminated.Reason != "Completed" && container.State.Terminated.Reason != "Error" { return errors.Errorf("container is terminated with %v reason", container.State.Terminated.Reason) } } diff --git a/pkg/utils/common/nodes.go b/pkg/utils/common/nodes.go index 2eee5552f..844a79309 100644 --- a/pkg/utils/common/nodes.go +++ b/pkg/utils/common/nodes.go @@ -3,6 +3,7 @@ package common import ( "math/rand" "strconv" + "strings" "time" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -19,14 +20,14 @@ var err error //GetNodeList check for the availibilty of the application node for the chaos execution // if the application node is not defined it will derive the random target node list using node affected percentage -func GetNodeList(nodeName, nodeLabel string, nodeAffPerc int, clients clients.ClientSets) ([]string, error) { +func GetNodeList(nodeNames, nodeLabel string, nodeAffPerc int, clients clients.ClientSets) ([]string, error) { var nodeList []string var nodes *apiv1.NodeList - if nodeName != "" { - nodeList = append(nodeList, nodeName) - return nodeList, nil + if nodeNames != "" { + targetNodesList := strings.Split(nodeNames, ",") + return targetNodesList, nil } switch nodeLabel { diff --git a/pkg/utils/common/pods.go b/pkg/utils/common/pods.go index 204550d37..44a87fd09 100644 --- a/pkg/utils/common/pods.go +++ b/pkg/utils/common/pods.go @@ -2,7 +2,6 @@ package common import ( "math/rand" - "strconv" "strings" "time" @@ -100,18 +99,18 @@ func GetChaosPodResourceRequirements(podName, containerName, namespace string, c // VerifyExistanceOfPods check the availibility of list of pods func VerifyExistanceOfPods(namespace, pods string, clients clients.ClientSets) (bool, error) { - if pods == "" { + if strings.TrimSpace(pods) == "" { return false, nil } - podList := strings.Split(pods, ",") + podList := strings.Split(strings.TrimSpace(pods), ",") for index := range podList { isPodsAvailable, err := CheckForAvailibiltyOfPod(namespace, podList[index], clients) if err != nil { return false, err } if !isPodsAvailable { - return isPodsAvailable, nil + return isPodsAvailable, errors.Errorf("%v pod is not available in %v namespace", podList[index], namespace) } } return true, nil @@ -120,12 +119,7 @@ func VerifyExistanceOfPods(namespace, pods string, clients clients.ClientSets) ( //GetPodList check for the availibilty of the target pod for the chaos execution // if the target pod is not defined it will derive the random target pod list using pod affected percentage func GetPodList(targetPods string, podAffPerc int, clients clients.ClientSets, chaosDetails *types.ChaosDetails) (core_v1.PodList, error) { - realpods := core_v1.PodList{} - podList, err := clients.KubeClient.CoreV1().Pods(chaosDetails.AppDetail.Namespace).List(v1.ListOptions{LabelSelector: chaosDetails.AppDetail.Label}) - if err != nil || len(podList.Items) == 0 { - return core_v1.PodList{}, errors.Wrapf(err, "Failed to find the pod with matching labels in %v namespace", chaosDetails.AppDetail.Namespace) - } - + finalPods := core_v1.PodList{} isPodsAvailable, err := VerifyExistanceOfPods(chaosDetails.AppDetail.Namespace, targetPods, clients) if err != nil { return core_v1.PodList{}, err @@ -139,17 +133,20 @@ func GetPodList(targetPods string, podAffPerc int, clients clients.ClientSets, c if err != nil { return core_v1.PodList{}, err } - realpods.Items = append(realpods.Items, podList.Items...) + finalPods.Items = append(finalPods.Items, podList.Items...) default: - nonChaosPods := FilterNonChaosPods(*podList, chaosDetails) - realpods, err = GetTargetPodsWhenTargetPodsENVNotSet(podAffPerc, clients, nonChaosPods, chaosDetails) + nonChaosPods, err := FilterNonChaosPods(clients, chaosDetails) if err != nil { return core_v1.PodList{}, err } + podList, err := GetTargetPodsWhenTargetPodsENVNotSet(podAffPerc, clients, nonChaosPods, chaosDetails) + if err != nil { + return core_v1.PodList{}, err + } + finalPods.Items = append(finalPods.Items, podList.Items...) } - log.Infof("[Chaos]:Number of pods targeted: %v", strconv.Itoa(len(realpods.Items))) - - return realpods, nil + log.Infof("[Chaos]:Number of pods targeted: %v", len(finalPods.Items)) + return finalPods, nil } // CheckForAvailibiltyOfPod check the availibility of the specified pod @@ -170,7 +167,13 @@ func CheckForAvailibiltyOfPod(namespace, name string, clients clients.ClientSets //FilterNonChaosPods remove the chaos pods(operator, runner) for the podList // it filter when the applabels are not defined and it will select random pods from appns -func FilterNonChaosPods(podList core_v1.PodList, chaosDetails *types.ChaosDetails) core_v1.PodList { +func FilterNonChaosPods(clients clients.ClientSets, chaosDetails *types.ChaosDetails) (core_v1.PodList, error) { + podList, err := clients.KubeClient.CoreV1().Pods(chaosDetails.AppDetail.Namespace).List(v1.ListOptions{LabelSelector: chaosDetails.AppDetail.Label}) + if err != nil { + return core_v1.PodList{}, err + } else if len(podList.Items) == 0 { + return core_v1.PodList{}, errors.Wrapf(err, "Failed to find the pod with matching labels in %v namespace", chaosDetails.AppDetail.Namespace) + } nonChaosPods := core_v1.PodList{} // ignore chaos pods for index, pod := range podList.Items { @@ -178,41 +181,35 @@ func FilterNonChaosPods(podList core_v1.PodList, chaosDetails *types.ChaosDetail nonChaosPods.Items = append(nonChaosPods.Items, podList.Items[index]) } } - return nonChaosPods + return nonChaosPods, nil } // GetTargetPodsWhenTargetPodsENVSet derive the specific target pods, if TARGET_PODS env is set func GetTargetPodsWhenTargetPodsENVSet(targetPods string, clients clients.ClientSets, chaosDetails *types.ChaosDetails) (core_v1.PodList, error) { - podList, err := clients.KubeClient.CoreV1().Pods(chaosDetails.AppDetail.Namespace).List(v1.ListOptions{LabelSelector: chaosDetails.AppDetail.Label}) - if err != nil || len(podList.Items) == 0 { - return core_v1.PodList{}, errors.Wrapf(err, "Failed to find the pods with matching labels in %v namespace", chaosDetails.AppDetail.Namespace) - } targetPodsList := strings.Split(targetPods, ",") realPods := core_v1.PodList{} - for _, pod := range podList.Items { - for index := range targetPodsList { - if targetPodsList[index] == pod.Name { - parentName, err := annotation.GetParentName(clients, pod, chaosDetails) - if err != nil { - return core_v1.PodList{}, err - } - switch chaosDetails.AppDetail.AnnotationCheck { - case true: - isParentAnnotated, err := annotation.IsParentAnnotated(clients, parentName, chaosDetails) - if err != nil { - return core_v1.PodList{}, err - } - if !isParentAnnotated { - return core_v1.PodList{}, errors.Errorf("%v target application is not annotated", parentName) - } - } - realPods.Items = append(realPods.Items, pod) - setParentName(parentName, chaosDetails) - log.Infof("[Info]: chaos candidate of kind: %v, name: %v, namespace: %v", chaosDetails.AppDetail.Kind, parentName, chaosDetails.AppDetail.Namespace) + for index := range targetPodsList { + pod, err := clients.KubeClient.CoreV1().Pods(chaosDetails.AppDetail.Namespace).Get(strings.TrimSpace(targetPodsList[index]), v1.GetOptions{}) + if err != nil { + return core_v1.PodList{}, errors.Wrapf(err, "Failed to get %v pod in %v namespace", targetPodsList[index], chaosDetails.AppDetail.Namespace) + } + switch chaosDetails.AppDetail.AnnotationCheck { + case true: + parentName, err := annotation.GetParentName(clients, *pod, chaosDetails) + if err != nil { + return core_v1.PodList{}, err + } + isParentAnnotated, err := annotation.IsParentAnnotated(clients, parentName, chaosDetails) + if err != nil { + return core_v1.PodList{}, err + } + if !isParentAnnotated { + return core_v1.PodList{}, errors.Errorf("%v target application is not annotated", parentName) } } + realPods.Items = append(realPods.Items, *pod) } return realPods, nil } @@ -234,8 +231,8 @@ func SetTargets(target, chaosStatus, kind string, chaosDetails *types.ChaosDetai chaosDetails.Targets = append(chaosDetails.Targets, newTarget) } -// setParentName set the parent name in chaosdetails struct -func setParentName(parentName string, chaosDetails *types.ChaosDetails) { +// SetParentName set the parent name in chaosdetails struct +func SetParentName(parentName string, chaosDetails *types.ChaosDetails) { if chaosDetails.ParentsResources == nil { chaosDetails.ParentsResources = []string{parentName} } else { @@ -253,25 +250,21 @@ func GetTargetPodsWhenTargetPodsENVNotSet(podAffPerc int, clients clients.Client filteredPods := core_v1.PodList{} realPods := core_v1.PodList{} for _, pod := range nonChaosPods.Items { - parentName, err := annotation.GetParentName(clients, pod, chaosDetails) - if err != nil { - return core_v1.PodList{}, err - } switch chaosDetails.AppDetail.AnnotationCheck { case true: + parentName, err := annotation.GetParentName(clients, pod, chaosDetails) + if err != nil { + return core_v1.PodList{}, err + } isParentAnnotated, err := annotation.IsParentAnnotated(clients, parentName, chaosDetails) if err != nil { return core_v1.PodList{}, err } if isParentAnnotated { filteredPods.Items = append(filteredPods.Items, pod) - setParentName(parentName, chaosDetails) - log.Infof("[Info]: chaos candidate of kind: %v, name: %v, namespace: %v", chaosDetails.AppDetail.Kind, parentName, chaosDetails.AppDetail.Namespace) } default: filteredPods.Items = append(filteredPods.Items, pod) - setParentName(parentName, chaosDetails) - log.Infof("[Info]: chaos candidate of kind: %v, name: %v, namespace: %v", chaosDetails.AppDetail.Kind, parentName, chaosDetails.AppDetail.Namespace) } } @@ -279,7 +272,7 @@ func GetTargetPodsWhenTargetPodsENVNotSet(podAffPerc int, clients clients.Client return filteredPods, errors.Errorf("No target pod found") } - newPodListLength := math.Maximum(1, math.Adjustment(podAffPerc, len(filteredPods.Items))) + newPodListLength := math.Maximum(1, math.Adjustment(math.Minimum(podAffPerc, 100), len(filteredPods.Items))) rand.Seed(time.Now().UnixNano()) // it will generate the random podlist