From b6f7cfd9a727b39f1ce56f0c7a605f4962b51de5 Mon Sep 17 00:00:00 2001
From: Shao Wang <77665902+Electronic-Waste@users.noreply.github.com>
Date: Wed, 7 Aug 2024 01:50:39 +0800
Subject: [PATCH] [SDK] test: Add e2e test for tune function. (#2399)

* fix(sdk): fix error field metrics_collector in tune function.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): Add e2e tests for tune function.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): add missing field parameters.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* refactor(test/sdk): add run-e2e-tune-api.py.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): delete tune testing code in run-e2e-experiment.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): add blank lines.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): add verbose and temporarily delete e2e-experiment test.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): add namespace_labels.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): add time.sleep(5).

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): add error output.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): build random image for tune.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): delete extra debug log.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* refactor(test/sdk): create separate workflow for tune.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): change api to API.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): change the permission of scripts.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): delete exit code & comment image pulling.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): delete image pulling phase.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): refactor workflow file to use template.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): mark experiments and trial-images as not required.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): pass tune-api param to setup-minikube.sh.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): fix err in template-e2e-test.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): add debug logs.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

* test(sdk): reorder params and delete logs.

Signed-off-by: Electronic-Waste <2690692950@qq.com>

---------

Signed-off-by: Electronic-Waste <2690692950@qq.com>
---
 .github/workflows/e2e-test-tune-api.yaml      |  34 +++++
 .../workflows/template-e2e-test/action.yaml   |  19 ++-
 .../kubeflow/katib/api/katib_client.py        |   2 +-
 .../v1beta1/scripts/gh-actions/build-load.sh  |  13 +-
 .../scripts/gh-actions/run-e2e-experiment.py  | 139 +----------------
 .../scripts/gh-actions/run-e2e-tune-api.py    |  97 ++++++++++++
 .../scripts/gh-actions/run-e2e-tune-api.sh    |  38 +++++
 .../scripts/gh-actions/setup-minikube.sh      |   7 +-
 test/e2e/v1beta1/scripts/gh-actions/verify.py | 141 ++++++++++++++++++
 9 files changed, 341 insertions(+), 149 deletions(-)
 create mode 100644 .github/workflows/e2e-test-tune-api.yaml
 create mode 100644 test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py
 create mode 100755 test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.sh
 create mode 100644 test/e2e/v1beta1/scripts/gh-actions/verify.py

diff --git a/.github/workflows/e2e-test-tune-api.yaml b/.github/workflows/e2e-test-tune-api.yaml
new file mode 100644
index 00000000000..e1f37a3701b
--- /dev/null
+++ b/.github/workflows/e2e-test-tune-api.yaml
@@ -0,0 +1,34 @@
+name: E2E Test with tune API
+
+on:
+  pull_request:
+    paths-ignore:
+      - "pkg/ui/v1beta1/frontend/**"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    runs-on: ubuntu-22.04
+    timeout-minutes: 120
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Test Env
+        uses: ./.github/workflows/template-setup-e2e-test
+        with:
+          kubernetes-version: ${{ matrix.kubernetes-version }}
+      
+      - name: Run e2e test with tune API
+        uses: ./.github/workflows/template-e2e-test
+        with:
+          tune-api: true
+
+    strategy:
+      fail-fast: false
+      matrix:
+        # Detail: https://hub.docker.com/r/kindest/node
+        kubernetes-version: ["v1.27.11", "v1.28.7", "v1.29.2"]
diff --git a/.github/workflows/template-e2e-test/action.yaml b/.github/workflows/template-e2e-test/action.yaml
index ef1ca26064d..7c9598df04b 100644
--- a/.github/workflows/template-e2e-test/action.yaml
+++ b/.github/workflows/template-e2e-test/action.yaml
@@ -4,15 +4,17 @@ description: Run e2e test using the minikube cluster
 
 inputs:
   experiments:
-    required: true
+    required: false
     description: comma delimited experiment name
+    default: ""
   training-operator:
     required: false
     description: whether to deploy training-operator or not
     default: false
   trial-images:
-    required: true
+    required: false
     description: comma delimited trial image name
+    default: ""
   katib-ui:
     required: true
     description: whether to deploy katib-ui or not
@@ -21,13 +23,17 @@ inputs:
     required: false
     description: mysql or postgres
     default: mysql
+  tune-api:
+    required: true
+    description: whether to execute tune-api test or not
+    default: false
 
 runs:
   using: composite
   steps:
     - name: Setup Minikube Cluster
       shell: bash
-      run: ./test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh ${{ inputs.katib-ui }} ${{ inputs.trial-images }} ${{ inputs.experiments }}
+      run: ./test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh ${{ inputs.katib-ui }} ${{ inputs.tune-api }} ${{ inputs.trial-images }} ${{ inputs.experiments }}
 
     - name: Setup Katib
       shell: bash
@@ -35,4 +41,9 @@ runs:
 
     - name: Run E2E Experiment
       shell: bash
-      run: ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.sh ${{ inputs.experiments }}
+      run: |
+        if "${{ inputs.tune-api }}"; then
+          ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.sh
+        else
+          ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.sh ${{ inputs.experiments }}
+        fi
diff --git a/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py b/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py
index b18a81cad81..ceb3be2ce77 100644
--- a/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py
+++ b/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py
@@ -386,7 +386,7 @@ def tune(
 
         # Add metrics collector to the Katib Experiment.
         # Up to now, We only support parameter `kind`, of which default value is `StdOut`, to specify the kind of metrics collector. 
-        experiment.spec.metrics_collector = models.V1beta1MetricsCollectorSpec(
+        experiment.spec.metrics_collector_spec = models.V1beta1MetricsCollectorSpec(
             collector=models.V1beta1CollectorSpec(kind=metrics_collector_config["kind"])
         )
 
diff --git a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh
index 2ce492da79a..cb0ea03cd5a 100755
--- a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh
+++ b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh
@@ -25,9 +25,10 @@ pushd .
 cd "$(dirname "$0")/../../../../.."
 trap popd EXIT
 
-TRIAL_IMAGES=${1:-""}
-EXPERIMENTS=${2:-""}
-DEPLOY_KATIB_UI=${3:-false}
+DEPLOY_KATIB_UI=${1:-false}
+TUNE_API=${2:-false}
+TRIAL_IMAGES=${3:-""}
+EXPERIMENTS=${4:-""}
 
 REGISTRY="docker.io/kubeflowkatib"
 TAG="e2e-test"
@@ -162,6 +163,12 @@ for name in "${TRIAL_IMAGE_ARRAY[@]}"; do
   run "$name" "examples/$VERSION/trial-images/$name/Dockerfile"
 done
 
+# Testing image for tune function
+if "$TUNE_API"; then
+  echo -e "\nPulling and building testing image for tune function..."
+  _build_containers "suggestion-hyperopt" "$CMD_PREFIX/suggestion/hyperopt/$VERSION/Dockerfile"
+fi
+
 echo -e "\nCleanup Build Cache...\n"
 docker buildx prune -f
 
diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.py b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.py
index 26ef2e9f6e2..efbe0539e73 100644
--- a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.py
+++ b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.py
@@ -1,6 +1,5 @@
 import argparse
 import logging
-import time
 
 from kubeflow.katib import ApiClient
 from kubeflow.katib import KatibClient
@@ -8,6 +7,7 @@
 from kubeflow.katib.constants import constants
 from kubeflow.katib.utils.utils import FakeResponse
 from kubernetes import client
+from verify import verify_experiment_results
 import yaml
 
 # Experiment timeout is 40 min.
@@ -17,143 +17,6 @@
 logging.basicConfig(level=logging.INFO)
 
 
-def verify_experiment_results(
-    katib_client: KatibClient,
-    experiment: models.V1beta1Experiment,
-    exp_name: str,
-    exp_namespace: str,
-):
-
-    # Get the best objective metric.
-    best_objective_metric = None
-    for metric in experiment.status.current_optimal_trial.observation.metrics:
-        if metric.name == experiment.spec.objective.objective_metric_name:
-            best_objective_metric = metric
-            break
-
-    if best_objective_metric is None:
-        raise Exception(
-            "Unable to get the best metrics for objective: {}. Current Optimal Trial: {}".format(
-                experiment.spec.objective.objective_metric_name,
-                experiment.status.current_optimal_trial,
-            )
-        )
-
-    # Get Experiment Succeeded reason.
-    for c in experiment.status.conditions:
-        if (
-            c.type == constants.EXPERIMENT_CONDITION_SUCCEEDED
-            and c.status == constants.CONDITION_STATUS_TRUE
-        ):
-            succeeded_reason = c.reason
-            break
-
-    trials_completed = experiment.status.trials_succeeded or 0
-    trials_completed += experiment.status.trials_early_stopped or 0
-    max_trial_count = experiment.spec.max_trial_count
-
-    # If Experiment is Succeeded because of Max Trial Reached, all Trials must be completed.
-    if (
-        succeeded_reason == "ExperimentMaxTrialsReached"
-        and trials_completed != max_trial_count
-    ):
-        raise Exception(
-            "All Trials must be Completed. Max Trial count: {}, Experiment status: {}".format(
-                max_trial_count, experiment.status
-            )
-        )
-
-    # If Experiment is Succeeded because of Goal reached, the metrics must be correct.
-    if succeeded_reason == "ExperimentGoalReached" and (
-        (
-            experiment.spec.objective.type == "minimize"
-            and float(best_objective_metric.min) > float(experiment.spec.objective.goal)
-        )
-        or (
-            experiment.spec.objective.type == "maximize"
-            and float(best_objective_metric.max) < float(experiment.spec.objective.goal)
-        )
-    ):
-        raise Exception(
-            "Experiment goal is reached, but metrics are incorrect. "
-            f"Experiment objective: {experiment.spec.objective}. "
-            f"Experiment best objective metric: {best_objective_metric}"
-        )
-
-    # Verify Suggestion's resources. Suggestion name = Experiment name.
-    suggestion = katib_client.get_suggestion(exp_name, exp_namespace)
-
-    # For the Never or FromVolume resume policies Suggestion must be Succeeded.
-    # For the LongRunning resume policy Suggestion must be always Running.
-    for c in suggestion.status.conditions:
-        if (
-            c.type == constants.EXPERIMENT_CONDITION_SUCCEEDED
-            and c.status == constants.CONDITION_STATUS_TRUE
-            and experiment.spec.resume_policy == "LongRunning"
-        ):
-            raise Exception(
-                f"Suggestion is Succeeded while Resume Policy is {experiment.spec.resume_policy}."
-                f"Suggestion conditions: {suggestion.status.conditions}"
-            )
-        elif (
-            c.type == constants.EXPERIMENT_CONDITION_RUNNING
-            and c.status == constants.CONDITION_STATUS_TRUE
-            and experiment.spec.resume_policy != "LongRunning"
-        ):
-            raise Exception(
-                f"Suggestion is Running while Resume Policy is {experiment.spec.resume_policy}."
-                f"Suggestion conditions: {suggestion.status.conditions}"
-            )
-
-    # For Never and FromVolume resume policies verify Suggestion's resources.
-    if (
-        experiment.spec.resume_policy == "Never"
-        or experiment.spec.resume_policy == "FromVolume"
-    ):
-        resource_name = exp_name + "-" + experiment.spec.algorithm.algorithm_name
-
-        # Suggestion's Service and Deployment should be deleted.
-        for i in range(10):
-            try:
-                client.AppsV1Api().read_namespaced_deployment(
-                    resource_name, exp_namespace
-                )
-            except client.ApiException as e:
-                if e.status == 404:
-                    break
-                else:
-                    raise e
-            # Deployment deletion might take some time.
-            time.sleep(1)
-        if i == 10:
-            raise Exception(
-                "Suggestion Deployment is still alive for Resume Policy: {}".format(
-                    experiment.spec.resume_policy
-                )
-            )
-
-        try:
-            client.CoreV1Api().read_namespaced_service(resource_name, exp_namespace)
-        except client.ApiException as e:
-            if e.status != 404:
-                raise e
-        else:
-            raise Exception(
-                "Suggestion Service is still alive for Resume Policy: {}".format(
-                    experiment.spec.resume_policy
-                )
-            )
-
-        # For FromVolume resume policy PVC should not be deleted.
-        if experiment.spec.resume_policy == "FromVolume":
-            try:
-                client.CoreV1Api().read_namespaced_persistent_volume_claim(
-                    resource_name, exp_namespace
-                )
-            except client.ApiException:
-                raise Exception("PVC is deleted for FromVolume Resume Policy")
-
-
 def run_e2e_experiment(
     katib_client: KatibClient,
     experiment: models.V1beta1Experiment,
diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py
new file mode 100644
index 00000000000..1ca3596af95
--- /dev/null
+++ b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py
@@ -0,0 +1,97 @@
+import argparse
+import logging
+
+from kubeflow.katib import KatibClient
+from kubeflow.katib import search
+from kubernetes import client
+from verify import verify_experiment_results
+
+# Experiment timeout is 40 min.
+EXPERIMENT_TIMEOUT = 60 * 40
+
+# The default logging config.
+logging.basicConfig(level=logging.INFO)
+
+
+def run_e2e_experiment_create_by_tune(
+    katib_client: KatibClient,
+    exp_name: str,
+    exp_namespace: str,
+):
+    # Create Katib Experiment and wait until it is finished.
+    logging.debug("Creating Experiment: {}/{}".format(exp_namespace, exp_name))
+    
+    # Use the test case from get-started tutorial.
+    # https://www.kubeflow.org/docs/components/katib/getting-started/#getting-started-with-katib-python-sdk
+    # [1] Create an objective function.
+    def objective(parameters):
+        import time
+        time.sleep(5)
+        result = 4 * int(parameters["a"]) - float(parameters["b"]) ** 2
+        print(f"result={result}")
+    
+    # [2] Create hyperparameter search space.
+    parameters = {
+        "a": search.int(min=10, max=20),
+        "b": search.double(min=0.1, max=0.2)
+    }
+
+    # [3] Create Katib Experiment with 4 Trials and 2 CPUs per Trial.
+    # And Wait until Experiment reaches Succeeded condition.
+    katib_client.tune(
+        name=exp_name,
+        namespace=exp_namespace,
+        objective=objective,
+        parameters=parameters,
+        objective_metric_name="result",
+        max_trial_count=4,
+        resources_per_trial={"cpu": "2"},
+    )
+    experiment = katib_client.wait_for_experiment_condition(
+        exp_name, exp_namespace, timeout=EXPERIMENT_TIMEOUT
+    )
+
+    # Verify the Experiment results.
+    verify_experiment_results(katib_client, experiment, exp_name, exp_namespace)
+
+    # Print the Experiment and Suggestion.
+    logging.debug(katib_client.get_experiment(exp_name, exp_namespace))
+    logging.debug(katib_client.get_suggestion(exp_name, exp_namespace))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--namespace", type=str, required=True, help="Namespace for the Katib E2E test",
+    )
+    parser.add_argument(
+        "--verbose", action="store_true", help="Verbose output for the Katib E2E test",
+    )
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+
+    katib_client = KatibClient()
+
+    namespace_labels = client.CoreV1Api().read_namespace(args.namespace).metadata.labels
+    if 'katib.kubeflow.org/metrics-collector-injection' not in namespace_labels:
+        namespace_labels['katib.kubeflow.org/metrics-collector-injection'] = 'enabled'
+        client.CoreV1Api().patch_namespace(args.namespace, {'metadata': {'labels': namespace_labels}})
+
+    # Test with run_e2e_experiment_create_by_tune
+    exp_name = "tune-example"
+    exp_namespace = args.namespace
+    try:
+        run_e2e_experiment_create_by_tune(katib_client, exp_name, exp_namespace)
+        logging.info("---------------------------------------------------------------")
+        logging.info(f"E2E is succeeded for Experiment created by tune: {exp_namespace}/{exp_name}")
+    except Exception as e:
+        logging.info("---------------------------------------------------------------")
+        logging.info(f"E2E is failed for Experiment created by tune: {exp_namespace}/{exp_name}")
+        raise e
+    finally:
+        # Delete the Experiment.
+        logging.info("---------------------------------------------------------------")
+        logging.info("---------------------------------------------------------------")
+        katib_client.delete_experiment(exp_name, exp_namespace)
diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.sh b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.sh
new file mode 100755
index 00000000000..1520d301439
--- /dev/null
+++ b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# Copyright 2024 The Kubeflow Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This shell script is used to run Katib Experiment.
+# Input parameter - path to Experiment yaml.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+cd "$(dirname "$0")"
+
+echo "Katib deployments"
+kubectl -n kubeflow get deploy
+echo "Katib services"
+kubectl -n kubeflow get svc
+echo "Katib pods"
+kubectl -n kubeflow get pod
+echo "Katib persistent volume claims"
+kubectl get pvc -n kubeflow
+echo "Available CRDs"
+kubectl get crd
+
+python run-e2e-tune-api.py --namespace default \
+--verbose || (kubectl get pods -n kubeflow && exit 1)
diff --git a/test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh b/test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh
index a24131bbb7d..b890a40d41b 100755
--- a/test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh
+++ b/test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh
@@ -22,8 +22,9 @@ set -o nounset
 cd "$(dirname "$0")"
 
 DEPLOY_KATIB_UI=${1:-false}
-TRIAL_IMAGES=${2:-""}
-EXPERIMENTS=${3:-""}
+TUNE_API=${2:-false}
+TRIAL_IMAGES=${3:-""}
+EXPERIMENTS=${4:-""}
 
 echo "Start to setup Minikube Kubernetes Cluster"
 kubectl version
@@ -31,4 +32,4 @@ kubectl cluster-info
 kubectl get nodes
 
 echo "Build and Load container images"
-./build-load.sh "$TRIAL_IMAGES" "$EXPERIMENTS" "$DEPLOY_KATIB_UI"
+./build-load.sh "$DEPLOY_KATIB_UI" "$TUNE_API" "$TRIAL_IMAGES" "$EXPERIMENTS" 
diff --git a/test/e2e/v1beta1/scripts/gh-actions/verify.py b/test/e2e/v1beta1/scripts/gh-actions/verify.py
new file mode 100644
index 00000000000..cbc522d8344
--- /dev/null
+++ b/test/e2e/v1beta1/scripts/gh-actions/verify.py
@@ -0,0 +1,141 @@
+import time
+
+from kubeflow.katib import KatibClient
+from kubeflow.katib import models
+from kubeflow.katib.constants import constants
+from kubernetes import client
+
+
+def verify_experiment_results(
+    katib_client: KatibClient,
+    experiment: models.V1beta1Experiment,
+    exp_name: str,
+    exp_namespace: str,
+):
+
+    # Get the best objective metric.
+    best_objective_metric = None
+    for metric in experiment.status.current_optimal_trial.observation.metrics:
+        if metric.name == experiment.spec.objective.objective_metric_name:
+            best_objective_metric = metric
+            break
+
+    if best_objective_metric is None:
+        raise Exception(
+            "Unable to get the best metrics for objective: {}. Current Optimal Trial: {}".format(
+                experiment.spec.objective.objective_metric_name,
+                experiment.status.current_optimal_trial,
+            )
+        )
+
+    # Get Experiment Succeeded reason.
+    for c in experiment.status.conditions:
+        if (
+            c.type == constants.EXPERIMENT_CONDITION_SUCCEEDED
+            and c.status == constants.CONDITION_STATUS_TRUE
+        ):
+            succeeded_reason = c.reason
+            break
+
+    trials_completed = experiment.status.trials_succeeded or 0
+    trials_completed += experiment.status.trials_early_stopped or 0
+    max_trial_count = experiment.spec.max_trial_count
+
+    # If Experiment is Succeeded because of Max Trial Reached, all Trials must be completed.
+    if (
+        succeeded_reason == "ExperimentMaxTrialsReached"
+        and trials_completed != max_trial_count
+    ):
+        raise Exception(
+            "All Trials must be Completed. Max Trial count: {}, Experiment status: {}".format(
+                max_trial_count, experiment.status
+            )
+        )
+
+    # If Experiment is Succeeded because of Goal reached, the metrics must be correct.
+    if succeeded_reason == "ExperimentGoalReached" and (
+        (
+            experiment.spec.objective.type == "minimize"
+            and float(best_objective_metric.min) > float(experiment.spec.objective.goal)
+        )
+        or (
+            experiment.spec.objective.type == "maximize"
+            and float(best_objective_metric.max) < float(experiment.spec.objective.goal)
+        )
+    ):
+        raise Exception(
+            "Experiment goal is reached, but metrics are incorrect. "
+            f"Experiment objective: {experiment.spec.objective}. "
+            f"Experiment best objective metric: {best_objective_metric}"
+        )
+
+    # Verify Suggestion's resources. Suggestion name = Experiment name.
+    suggestion = katib_client.get_suggestion(exp_name, exp_namespace)
+
+    # For the Never or FromVolume resume policies Suggestion must be Succeeded.
+    # For the LongRunning resume policy Suggestion must be always Running.
+    for c in suggestion.status.conditions:
+        if (
+            c.type == constants.EXPERIMENT_CONDITION_SUCCEEDED
+            and c.status == constants.CONDITION_STATUS_TRUE
+            and experiment.spec.resume_policy == "LongRunning"
+        ):
+            raise Exception(
+                f"Suggestion is Succeeded while Resume Policy is {experiment.spec.resume_policy}."
+                f"Suggestion conditions: {suggestion.status.conditions}"
+            )
+        elif (
+            c.type == constants.EXPERIMENT_CONDITION_RUNNING
+            and c.status == constants.CONDITION_STATUS_TRUE
+            and experiment.spec.resume_policy != "LongRunning"
+        ):
+            raise Exception(
+                f"Suggestion is Running while Resume Policy is {experiment.spec.resume_policy}."
+                f"Suggestion conditions: {suggestion.status.conditions}"
+            )
+
+    # For Never and FromVolume resume policies verify Suggestion's resources.
+    if (
+        experiment.spec.resume_policy == "Never"
+        or experiment.spec.resume_policy == "FromVolume"
+    ):
+        resource_name = exp_name + "-" + experiment.spec.algorithm.algorithm_name
+
+        # Suggestion's Service and Deployment should be deleted.
+        for i in range(10):
+            try:
+                client.AppsV1Api().read_namespaced_deployment(
+                    resource_name, exp_namespace
+                )
+            except client.ApiException as e:
+                if e.status == 404:
+                    break
+                else:
+                    raise e
+        if i == 10:
+            raise Exception(
+                "Suggestion Deployment is still alive for Resume Policy: {}".format(
+                    experiment.spec.resume_policy
+                )
+            )
+
+        try:
+            client.CoreV1Api().read_namespaced_service(resource_name, exp_namespace)
+        except client.ApiException as e:
+            if e.status != 404:
+                raise e
+        else:
+            raise Exception(
+                "Suggestion Service is still alive for Resume Policy: {}".format(
+                    experiment.spec.resume_policy
+                )
+            )
+
+        # For FromVolume resume policy PVC should not be deleted.
+        if experiment.spec.resume_policy == "FromVolume":
+            try:
+                client.CoreV1Api().read_namespaced_persistent_volume_claim(
+                    resource_name, exp_namespace
+                )
+            except client.ApiException:
+                raise Exception("PVC is deleted for FromVolume Resume Policy")