ploting update

lasgroup · May 7, 2024 · 2ebec97 · 2ebec97
1 parent 354c962
commit 2ebec97
Show file tree

Hide file tree

Showing 6 changed files with 249 additions and 30 deletions.
diff --git a/experiments/bounded_switches/analysis/save_data.py b/experiments/bounded_switches/analysis/save_data.py
@@ -37,4 +37,4 @@
 print(df.head())  # Display the first few rows of the DataFrame
 
 # You can now save this DataFrame to a CSV file or perform further analysis
-df.to_csv("data/greenhouse_bounded_switches.csv", index=False)
+df.to_csv("data/greenhouse_temperature_tracking.csv", index=False)
diff --git a/experiments/noise_influence/analysis/make_plots.py b/experiments/noise_influence/analysis/make_plots.py
@@ -30,7 +30,7 @@ class Statistics(NamedTuple):
     xs: np.ndarray
     ys_mean: np.ndarray
     ys_std: np.ndarray
-    color: str = "Blue"
+    color: str = "C0"
     linestyle: str = "--"
     linewidth: float = LINE_WIDTH
 
@@ -39,10 +39,11 @@ class Statistics(NamedTuple):
 data['results/reward_with_switch_cost'] = data['results/total_reward'] - data['switch_cost'] * data[
     'results/num_actions']
 
-fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 8))
-SWITCH_COST = 1.0
+fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 5))
+SWITCH_COST = 0.1
 
 envs = ['Pendulum', 'Greenhouse']
+envs_labels = ['Pendulum Swing-up \n [Duration=10s]', 'Greenhouse Temperature Tracking \n [Duration=25h]', ]
 
 for index, env_name in enumerate(envs):
     cur_data = data[(data['switch_cost'] == SWITCH_COST) &
@@ -73,7 +74,7 @@ class Statistics(NamedTuple):
         xs=np.array(cur_data_reward_without_switch_cost['scale']),
         ys_mean=np.array(cur_data_reward_without_switch_cost['mean']),
         ys_std=np.array(cur_data_reward_without_switch_cost['std']),
-        color='Blue',
+        color='C0',
         linestyle='-'
     )
 
@@ -82,7 +83,7 @@ class Statistics(NamedTuple):
         xs=np.array(cur_data_reward_with_switch_cost['scale']),
         ys_mean=np.array(cur_data_reward_with_switch_cost['mean']),
         ys_std=np.array(cur_data_reward_with_switch_cost['std']),
-        color='Blue',
+        color='C0',
         linestyle='dashed'
     )
 
@@ -102,7 +103,7 @@ class Statistics(NamedTuple):
         ax[index].tick_params(axis='y', labelcolor=baseline_stat.color)
         ax[index].set_ylabel('Reward', fontsize=LABEL_FONT_SIZE, color=baseline_stat.color)
 
-    ax[index].set_title(f'{env_name}', fontsize=LABEL_FONT_SIZE)
+    ax[index].set_title(envs_labels[index], fontsize=LABEL_FONT_SIZE, pad=60)
     ax[index].set_xlabel(r'Noise scale', fontsize=LABEL_FONT_SIZE)
 
     ax_right_side = ax[index].twinx()
@@ -134,13 +135,13 @@ class Statistics(NamedTuple):
 fig.legend(by_label.values(), by_label.keys(),
            ncols=3,
            loc='upper center',
-           bbox_to_anchor=(0.5, 0.89),
+           bbox_to_anchor=(0.5, 0.85),
            fontsize=LEGEND_FONT_SIZE,
            frameon=True)
-
-fig.suptitle(f'Noise influence',
-             fontsize=TITLE_FONT_SIZE,
-             y=0.95)
-fig.tight_layout(rect=[0.0, 0.0, 1, 0.86])
+#
+# fig.suptitle(f'Noise influence',
+#              fontsize=TITLE_FONT_SIZE,
+#              y=0.95)
+fig.tight_layout(rect=[0.0, 0.0, 1, 1])
 plt.savefig('noise_influence.pdf')
 plt.show()
diff --git a/experiments/switch_cost_performance/analysis/make_plots.py b/experiments/switch_cost_performance/analysis/make_plots.py
@@ -5,7 +5,7 @@
 from scipy.ndimage import gaussian_filter1d
 from typing import NamedTuple, Dict
 
-LEGEND_FONT_SIZE = 22
+LEGEND_FONT_SIZE = 26
 TITLE_FONT_SIZE = 34
 TABLE_FONT_SIZE = 20
 LABEL_FONT_SIZE = 26
@@ -39,9 +39,10 @@ class Statistics(NamedTuple):
     linewidth: float = LINE_WIDTH
 
 
-fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 8))
+fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 5))
 
-envs = ['Pendulum', 'Greenhouse']
+envs_labels = ['Pendulum Swing-up \n [Duration=10s]', 'Greenhouse Temperature Tracking \n [Duration=25h]', ]
+envs = ['Pendulum', 'Greenhouse', ]
 
 for index, env_name in enumerate(envs):
     cur_data = data[data['env_name'] == env_name]
@@ -71,7 +72,7 @@ class Statistics(NamedTuple):
         xs=np.array(cur_data_reward_without_switch_cost['switch_cost']),
         ys_mean=np.array(cur_data_reward_without_switch_cost['mean']),
         ys_std=np.array(cur_data_reward_without_switch_cost['std']),
-        color='Blue',
+        color='C0',
         linestyle='-'
     )
 
@@ -80,7 +81,7 @@ class Statistics(NamedTuple):
         xs=np.array(cur_data_reward_with_switch_cost['switch_cost']),
         ys_mean=np.array(cur_data_reward_with_switch_cost['mean']),
         ys_std=np.array(cur_data_reward_with_switch_cost['std']),
-        color='Blue',
+        color='C0',
         linestyle='dashed'
     )
 
@@ -100,10 +101,9 @@ class Statistics(NamedTuple):
         ax[index].tick_params(axis='y', labelcolor=baseline_stat.color)
         ax[index].set_ylabel('Reward', fontsize=LABEL_FONT_SIZE, color=baseline_stat.color)
 
-    ax[index].set_title(f'{env_name}', fontsize=LABEL_FONT_SIZE)
+    ax[index].set_title(envs_labels[index], fontsize=LABEL_FONT_SIZE, pad=60)
     ax[index].set_xlabel(r'Switch Cost', fontsize=LABEL_FONT_SIZE)
 
-
     ax_right_side = ax[index].twinx()
     for baseline_name, baseline_stat in baseline_num_actions.items():
         ax_right_side.plot(baseline_stat.xs, baseline_stat.ys_mean,
@@ -133,13 +133,13 @@ class Statistics(NamedTuple):
 fig.legend(by_label.values(), by_label.keys(),
            ncols=3,
            loc='upper center',
-           bbox_to_anchor=(0.5, 0.89),
+           bbox_to_anchor=(0.5, 0.85),
            fontsize=LEGEND_FONT_SIZE,
            frameon=True)
 
-fig.suptitle(f'Switch cost influence',
-             fontsize=TITLE_FONT_SIZE,
-             y=0.95)
-fig.tight_layout(rect=[0.0, 0.0, 1, 0.86])
+# fig.suptitle(f'Switch cost influence',
+#              fontsize=TITLE_FONT_SIZE,
+#              y=0.95)
+fig.tight_layout(rect=[0.0, 0.0, 1, 1])
 plt.savefig('switch_cost_influence.pdf')
 plt.show()
diff --git a/playground/pendulum_ih_switch_cost.py b/playground/pendulum_ih_switch_cost.py
@@ -31,17 +31,17 @@
         env = PendulumEnvSwingDown(reward_source='dm-control')
 
     min_time_between_switches = 1 * env.dt
-    max_time_between_switches = 30 * env.dt
+    max_time_between_switches = 50 * env.dt
 
     discount_factor = 0.99
     continuous_discounting = discrete_to_continuous_discounting(discrete_discounting=discount_factor,
                                                                 dt=env.dt)
     if wrapper:
         env = IHSwitchCostWrapper(env,
                                   num_integrator_steps=episode_length,
-                                  min_time_between_switches=1 * env.dt,
-                                  max_time_between_switches=30 * env.dt,
-                                  switch_cost=ConstantSwitchCost(value=jnp.array(0.1)),
+                                  min_time_between_switches=min_time_between_switches,
+                                  max_time_between_switches=max_time_between_switches,
+                                  switch_cost=ConstantSwitchCost(value=jnp.array(1.0)),
                                   time_as_part_of_state=time_as_part_of_state,
                                   discounting=discount_factor)
 
@@ -53,7 +53,7 @@
     optimizer = SAC(
         target_entropy=None,
         environment=env,
-        num_timesteps=20_000,
+        num_timesteps=100_000,
         episode_length=episode_length,
         action_repeat=action_repeat,
         num_env_steps_between_updates=num_env_steps_between_updates,

diff --git a/plotting/__init__.py b/plotting/__init__.py
diff --git a/plotting/reward_vs_number_of_applied_actions/make_plot.py b/plotting/reward_vs_number_of_applied_actions/make_plot.py
@@ -0,0 +1,218 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import matplotlib as mpl
+from typing import NamedTuple, Tuple
+
+LEGEND_FONT_SIZE = 26
+TITLE_FONT_SIZE = 26
+TABLE_FONT_SIZE = 20
+LABEL_FONT_SIZE = 26
+TICKS_SIZE = 24
+OBSERVATION_SIZE = 300
+
+EPISODE_LEN = 300
+NUMBER_OF_SAMPLES = 5
+LINE_WIDTH = 5
+
+plt.rc('text', usetex=True)
+plt.rc('text.latex', preamble=
+r'\usepackage{amsmath}'
+r'\usepackage{bm}'
+r'\def\vx{{\bm{x}}}'
+r'\def\vf{{\bm{f}}}')
+
+mpl.rcParams['xtick.labelsize'] = TICKS_SIZE
+mpl.rcParams['ytick.labelsize'] = TICKS_SIZE
+
+
+class Statistics(NamedTuple):
+    xs: np.ndarray
+    ys_mean: np.ndarray
+    ys_std: np.ndarray
+    title: str
+    ax_lim: Tuple[int, int]
+
+
+stats_optimized = []
+stats_equidistant = []
+
+############################# Greenhouse tracking #############################
+###############################################################################
+
+data = pd.read_csv('data/greenhouse_temperature_tracking.csv')
+data_bounded_switches = data.loc[data['wrapper'] == True]
+data_repeated_actions = data.loc[data['wrapper'] == False]
+
+data_repeated_actions.loc[:, 'actual_switches'] = EPISODE_LEN / data_repeated_actions['action_repeat']
+
+# Prepare data for data_bounded_switches
+grouped_bounded_switches = data_bounded_switches.groupby('num_switches')['results/total_reward'].agg(['mean', 'std'])
+grouped_bounded_switches = grouped_bounded_switches.reset_index()
+
+xs_bounded_switches = np.array(grouped_bounded_switches['num_switches'])
+ys_bounded_switches_mean = np.array(grouped_bounded_switches['mean'])
+ys_bounded_switches_std = np.array(grouped_bounded_switches['std'])
+
+# Prepare data for data_repeated_actions
+grouped_repeated_actions = data_repeated_actions.groupby('actual_switches')['results/total_reward'].agg(
+    ['mean', 'std'])
+grouped_repeated_actions = grouped_repeated_actions.reset_index()
+
+xs_repeated_actions = np.array(grouped_repeated_actions['actual_switches'])
+ys_repeated_actions_mean = np.array(grouped_repeated_actions['mean'])
+ys_repeated_actions_std = np.array(grouped_repeated_actions['std'])
+
+greenhouse_stats_optimized = Statistics(
+    xs=xs_bounded_switches,
+    ys_mean=ys_bounded_switches_mean,
+    ys_std=ys_bounded_switches_std,
+    title='Greenhouse Temperature Tracking \n [Duration=25h]',
+    ax_lim=(0, 35)
+)
+greenhouse_stats_equidistant = Statistics(
+    xs=xs_repeated_actions,
+    ys_mean=ys_repeated_actions_mean,
+    ys_std=ys_repeated_actions_std,
+    title='Greenhouse Temperature Tracking \n [Duration=25h]',
+    ax_lim=(0, 35)
+)
+
+stats_optimized.append(greenhouse_stats_optimized)
+stats_equidistant.append(greenhouse_stats_equidistant)
+
+############################# Pendulum Swing Up #############################
+###############################################################################
+
+data = pd.read_csv('data/pendulum_swing_up.csv')
+data_bounded_switches = data.loc[data['wrapper'] == True]
+data_repeated_actions = data.loc[data['wrapper'] == False]
+
+# Prepare data for data_bounded_switches
+grouped_bounded_switches = data_bounded_switches.groupby('num_switches')['results/total_reward'].agg(['mean', 'std'])
+grouped_bounded_switches = grouped_bounded_switches.reset_index()
+
+xs_bounded_switches = np.array(grouped_bounded_switches['num_switches'])
+ys_bounded_switches_mean = np.array(grouped_bounded_switches['mean'])
+ys_bounded_switches_std = np.array(grouped_bounded_switches['std'])
+
+# Prepare data for data_repeated_actions
+grouped_repeated_actions = data_repeated_actions.groupby('results/num_actions')['results/total_reward'].agg(
+    ['mean', 'std'])
+grouped_repeated_actions = grouped_repeated_actions.reset_index()
+
+xs_repeated_actions = np.array(grouped_repeated_actions['results/num_actions'])
+ys_repeated_actions_mean = np.array(grouped_repeated_actions['mean'])
+ys_repeated_actions_std = np.array(grouped_repeated_actions['std'])
+
+stat_optimized = Statistics(
+    xs=xs_bounded_switches,
+    ys_mean=ys_bounded_switches_mean,
+    ys_std=ys_bounded_switches_std,
+    title='Pendulum Swing-up \n [Duration=10seconds]',
+    ax_lim=(8, 35)
+)
+stat_equidistant = Statistics(
+    xs=xs_repeated_actions,
+    ys_mean=ys_repeated_actions_mean,
+    ys_std=ys_repeated_actions_std,
+    title='Pendulum Swing-up \n [Duration=10seconds]',
+    ax_lim=(8, 35)
+)
+
+stats_optimized.append(stat_optimized)
+stats_equidistant.append(stat_equidistant)
+
+############################# Pendulum Swing Down #############################
+###############################################################################
+
+data = pd.read_csv('data/pendulum_swing_down.csv')
+data_bounded_switches = data.loc[data['wrapper'] == True]
+data_repeated_actions = data.loc[data['wrapper'] == False]
+
+# Prepare data for data_bounded_switches
+grouped_bounded_switches = data_bounded_switches.groupby('num_switches')['results/total_reward'].agg(['mean', 'std'])
+grouped_bounded_switches = grouped_bounded_switches.reset_index()
+
+xs_bounded_switches = np.array(grouped_bounded_switches['num_switches'])
+ys_bounded_switches_mean = np.array(grouped_bounded_switches['mean'])
+ys_bounded_switches_std = np.array(grouped_bounded_switches['std'])
+
+# Prepare data for data_repeated_actions
+grouped_repeated_actions = data_repeated_actions.groupby('results/num_actions')['results/total_reward'].agg(
+    ['mean', 'std'])
+grouped_repeated_actions = grouped_repeated_actions.reset_index()
+
+xs_repeated_actions = np.array(grouped_repeated_actions['results/num_actions'])
+ys_repeated_actions_mean = np.array(grouped_repeated_actions['mean'])
+ys_repeated_actions_std = np.array(grouped_repeated_actions['std'])
+
+
+stat_optimized = Statistics(
+    xs=xs_bounded_switches,
+    ys_mean=ys_bounded_switches_mean,
+    ys_std=ys_bounded_switches_std,
+    title='Pendulum Swing-down \n [Duration=15seconds]',
+    ax_lim=(3, 35)
+)
+stat_equidistant = Statistics(
+    xs=xs_repeated_actions,
+    ys_mean=ys_repeated_actions_mean,
+    ys_std=ys_repeated_actions_std,
+    title='Pendulum Swing-down \n [Duration=15seconds]',
+    ax_lim=(3, 35)
+)
+
+stats_optimized.append(stat_optimized)
+stats_equidistant.append(stat_equidistant)
+
+# Plotting
+fig, axs = plt.subplots(nrows=1, ncols=len(stats_optimized), figsize=(20, 5))
+axs = np.array(axs).reshape(len(stats_optimized), )
+
+for index, (stat_opt, stat_equi) in enumerate(zip(stats_optimized, stats_equidistant)):
+    axs[index].plot(stat_opt.xs,
+                    stat_opt.ys_mean,
+                    label='Optimized time between actions',
+                    linewidth=LINE_WIDTH)
+    axs[index].fill_between(stat_opt.xs,
+                            stat_opt.ys_mean - 2 * stat_opt.ys_std / np.sqrt(NUMBER_OF_SAMPLES),
+                            stat_opt.ys_mean + 2 * stat_opt.ys_std / np.sqrt(NUMBER_OF_SAMPLES),
+                            alpha=0.3)
+
+    axs[index].plot(stat_equi.xs, stat_equi.ys_mean,
+                    label='Equidistant time between actions',
+                    linewidth=LINE_WIDTH,
+                    linestyle='dashed'
+                    )
+    axs[index].fill_between(stat_equi.xs,
+                            stat_equi.ys_mean - 2 * stat_equi.ys_std / np.sqrt(NUMBER_OF_SAMPLES),
+                            stat_equi.ys_mean + 2 * stat_equi.ys_std / np.sqrt(NUMBER_OF_SAMPLES),
+                            alpha=0.3)
+
+    axs[index].set_xlim(*stat_equi.ax_lim)
+    axs[index].set_xlabel('Number of applied actions', fontsize=LABEL_FONT_SIZE)
+    if index == 0:
+        axs[index].set_ylabel('Total reward', fontsize=LABEL_FONT_SIZE)
+    axs[index].set_title(stat_opt.title, fontsize=TITLE_FONT_SIZE, pad=60)
+
+
+handles, labels = [], []
+for ax in axs:
+    for handle, label in zip(*ax.get_legend_handles_labels()):
+        handles.append(handle)
+        labels.append(label)
+
+by_label = dict(zip(labels, handles))
+
+fig.legend(by_label.values(), by_label.keys(),
+           ncols=3,
+           loc='upper center',
+           bbox_to_anchor=(0.5, 0.85),
+           fontsize=LEGEND_FONT_SIZE,
+           frameon=True)
+
+fig.tight_layout(rect=[0.0, 0.0, 1, 1])
+
+plt.savefig('reward_vs_number_of_actions.pdf')
+plt.show()