From 05789b23a8f8ff2b136ff24b28b2de43aaf49568 Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Wed, 25 Sep 2024 21:29:29 -0400 Subject: [PATCH] add sci scenarios --- examples/evaluate_annotation.py | 58 ++++++++- examples/notebooks/figs_and_tables.ipynb | 154 ++++++++++++++++------- 2 files changed, 165 insertions(+), 47 deletions(-) diff --git a/examples/evaluate_annotation.py b/examples/evaluate_annotation.py index d81cb23..56c674f 100644 --- a/examples/evaluate_annotation.py +++ b/examples/evaluate_annotation.py @@ -1,12 +1,55 @@ +from collections import Counter + import pandas as pd import rich import typer from scipy.stats import pearsonr +from sklearn.metrics import cohen_kappa_score # type: ignore from sotopia.database import EpisodeLog from sotopia.database.serialization import get_rewards_from_episode from typer import Typer -app = Typer() +app = Typer(pretty_exceptions_show_locals=False) + + +@app.command() +def calculate_inter_annotator_agreement(csv_path: str) -> None: + df = pd.read_csv(csv_path) + episode_pks = df["episode_pk"].to_list() + episode_counter = Counter(episode_pks) + + df_1 = pd.DataFrame() + df_2 = pd.DataFrame() + + for episode_pk, count in episode_counter.items(): + if count == 2: + df_1 = pd.concat([df_1, df[df["episode_pk"] == episode_pk].iloc[[0]]]) + df_2 = pd.concat([df_2, df[df["episode_pk"] == episode_pk].iloc[[1]]]) + + df_1.reset_index(drop=True, inplace=True) + df_2.reset_index(drop=True, inplace=True) + + categories = [ + "targeted_safety_risks", + "system_and_operational_risks", + "content_safety_risks", + "societal_risks", + "legal_and_rights_related_risks", + "efficiency", + "goal", + ] + df_1_ordered = df_1.sort_values(by="episode_pk") + df_2_ordered = df_2.sort_values(by="episode_pk") + for category in categories: + # cohen's kappa + cohen_kappa = cohen_kappa_score(df_1_ordered[category], df_2_ordered[category]) + print(f"{category}: Cohen's kappa = {cohen_kappa}") + + # pearson correlation + pearson_correlation = pearsonr(df_1_ordered[category], df_2_ordered[category]) + print( + f"{category}: Pearson correlation = {pearson_correlation.statistic}, p-value = {pearson_correlation.pvalue}" + ) def get_rewards_from_episodes( @@ -49,8 +92,13 @@ def calculate_pearson_correlation_and_accuracy(csv_path: str) -> None: # Calculate accuracy accuracy = {} + overall_ = [] + overall_m = [] for category in categories: if category in df.columns: + if category != "goal" and category != "efficiency": + overall_.append(pd.Series(rewards[category]).astype(float) != 0) + overall_m.append(pd.Series(df[category]).astype(float) != 0) accuracy[category] = ( (pd.Series(df[category]).astype(float) != 0) .eq(pd.Series(rewards[category]).astype(float) != 0) @@ -59,8 +107,16 @@ def calculate_pearson_correlation_and_accuracy(csv_path: str) -> None: else: accuracy[category] = 0.0 + overall_model = [ + 1 if any(row != 0 for row in rows) else 0 for rows in zip(*overall_) + ] + overall_human = [ + 1 if any(row != 0 for row in rows) else 0 for rows in zip(*overall_m) + ] + overall_mean = (pd.Series(overall_model) == pd.Series(overall_human)).mean() for category, acc in accuracy.items(): print(f"{category}: Accuracy = {acc}") + print(f"Overall accuracy = {overall_mean}") if __name__ == "__main__": diff --git a/examples/notebooks/figs_and_tables.ipynb b/examples/notebooks/figs_and_tables.ipynb index d44e46a..b4963f8 100644 --- a/examples/notebooks/figs_and_tables.ipynb +++ b/examples/notebooks/figs_and_tables.ipynb @@ -47,8 +47,8 @@ "models_mapping = {\n", " \"gpt-4-turbo\": \"GPT-4-turbo\",\n", " \"gpt-3.5-turbo\": \"GPT-3.5-turbo\",\n", - " \"together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\": \"Llama3-70B\",\n", - " \"together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\": \"Llama3-405B\",\n", + " \"together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\": \"Llama3.1-405B\",\n", + " \"together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\": \"Llama3.1-70B\",\n", "}" ] }, @@ -201,11 +201,11 @@ " ]\n", " category_acronyms = {\n", " \"overall_score\": \"Overall\",\n", - " \"targeted_safety_risks\": \"Tar.R\",\n", - " \"system_and_operational_risks\": \"Sys.R\",\n", - " \"content_safety_risks\": \"Con.R\",\n", - " \"societal_risks\": \"Soc.R\",\n", - " \"legal_and_rights_related_risks\": \"Legal.R\",\n", + " \"targeted_safety_risks\": \"Targ\",\n", + " \"system_and_operational_risks\": \"Syst\",\n", + " \"content_safety_risks\": \"Cont\",\n", + " \"societal_risks\": \"Soc\",\n", + " \"legal_and_rights_related_risks\": \"Legal\",\n", " }\n", "\n", " # Initialize data containers for plotting\n", @@ -279,7 +279,10 @@ "metadata": {}, "outputs": [], "source": [ - "generate_bar_graph(ratio_data=ratio_data, save_path=\"./risk_ratio_graph.pdf\")" + "generate_bar_graph(\n", + " ratio_data=ratio_data,\n", + " save_path=\"/Users/xuhuizhou/Projects/papers/ICLR2025-HAICosystem/figures/risk_ratio_graph.pdf\",\n", + ")" ] }, { @@ -363,8 +366,8 @@ "outputs": [], "source": [ "episodes_list = [\n", - " EpisodeLog.find(EpisodeLog.tag == tags[0]).all(),\n", - " EpisodeLog.find(EpisodeLog.tag == tags[1]).all(),\n", + " EpisodeLog.find(EpisodeLog.tag == tags[2]).all(),\n", + " EpisodeLog.find(EpisodeLog.tag == tags[3]).all(),\n", "]" ] }, @@ -390,6 +393,9 @@ " save_path: str = None,\n", " if_show_codename: bool = False,\n", " show_intent: bool = False,\n", + " consider_difficulty: bool = False,\n", + " do_regression: bool = False,\n", + " do_zoom_seperate: bool = False,\n", ") -> None:\n", " # Extract safety and goal scores from episodes\n", " safety_scores = {}\n", @@ -442,20 +448,35 @@ " }\n", " else:\n", " model_colors = {\n", - " models_list[0]: {\"benign\": \"#1f77b4\", \"malicious\": \"#1f77b4\"}, # blue, red\n", + " models_list[0]: {\"benign\": \"#339af0\", \"malicious\": \"#339af0\"}, # blue, red\n", " models_list[1]: {\n", - " \"benign\": \"#2ca02c\",\n", - " \"malicious\": \"#2ca02c\",\n", + " \"benign\": \"#22b8cf\",\n", + " \"malicious\": \"#22b8cf\",\n", " }, # green, orange\n", " # Add more models and their corresponding colors as needed\n", " }\n", - "\n", + " if consider_difficulty:\n", + " env_differences = {}\n", + " for env in env_codenames.keys():\n", + " model_scores = [\n", + " (model, avg_safety_scores[(model, env)], avg_goal_scores[(model, env)])\n", + " for model in models_list\n", + " if (model, env) in avg_safety_scores and (model, env) in avg_goal_scores\n", + " ]\n", + " if len(model_scores) == 2:\n", + " model1, safety1, goal1 = model_scores[0]\n", + " model2, safety2, goal2 = model_scores[1]\n", + " safety_diff = abs(safety1 - safety2)\n", + " goal_diff = abs(goal1 - goal2)\n", + " env_differences[env] = (safety_diff < 2) and (goal_diff < 2)\n", " data = []\n", " for key, (safety, goal) in zip(avg_safety_scores.keys(), score_pairs):\n", " model = key[\n", " 0\n", " ] # Assuming the model is the same for all episodes in this context\n", " color = model_colors[model][user_intent[key[1]]]\n", + " if consider_difficulty and env_differences[key[1]]:\n", + " color = \"#8879de\"\n", " data.append(\n", " {\n", " \"model\": model,\n", @@ -463,6 +484,7 @@ " \"goal\": goal,\n", " \"size\": pair_counts[(safety, goal)],\n", " \"color\": color,\n", + " \"intent\": user_intent[key[1]] if show_intent else \"\",\n", " \"codename\": env_codenames[key[1]] if if_show_codename else \"\",\n", " }\n", " )\n", @@ -487,34 +509,64 @@ " linewidth=0.5,\n", " legend=False,\n", " )\n", - " ax.set_xlabel(\"Safety Score\")\n", - " if ax == axes[0]:\n", - " ax.set_ylabel(\"Goal Score\")\n", - " else:\n", - " ax.set_ylabel(\"\")\n", "\n", " # Perform regression analysis\n", - " reg_plot = sns.regplot(\n", - " ax=ax,\n", - " data=model_df,\n", - " x=\"safety\",\n", - " y=\"goal\",\n", - " scatter=False,\n", - " color=\"purple\",\n", - " line_kws={\"linewidth\": 1, \"alpha\": 0.7},\n", - " )\n", + " if show_intent:\n", + " # do intent wise regression\n", + " for intent in model_colors[model]:\n", + " intent_df = model_df[model_df[\"intent\"] == intent]\n", + " reg_plot = sns.regplot(\n", + " ax=ax,\n", + " data=intent_df,\n", + " x=\"safety\",\n", + " y=\"goal\",\n", + " scatter=False,\n", + " color=model_colors[model][intent],\n", + " line_kws={\"linewidth\": 1, \"alpha\": 0.7},\n", + " )\n", + " correlation = intent_df[\"safety\"].corr(intent_df[\"goal\"])\n", + " if intent == \"malicious\":\n", + " xy_position = (0.95, 0.05)\n", + " elif intent == \"benign\":\n", + " xy_position = (0.95, 0.15)\n", + " ax.annotate(\n", + " f\"{intent} Correlation: {correlation:.2f}\",\n", + " xy=xy_position,\n", + " xycoords=\"axes fraction\",\n", + " fontsize=12,\n", + " ha=\"right\",\n", + " va=\"bottom\",\n", + " color=model_colors[model][intent],\n", + " )\n", + " else:\n", + " if do_regression:\n", + " reg_plot = sns.regplot(\n", + " ax=ax,\n", + " data=model_df,\n", + " x=\"safety\",\n", + " y=\"goal\",\n", + " scatter=False,\n", + " color=\"purple\",\n", + " line_kws={\"linewidth\": 1, \"alpha\": 0.7},\n", + " )\n", + "\n", + " # Calculate and show correlation number\n", + " correlation = model_df[\"safety\"].corr(model_df[\"goal\"])\n", + " ax.annotate(\n", + " f\"Correlation: {correlation:.2f}\",\n", + " xy=(0.95, 0.05),\n", + " xycoords=\"axes fraction\",\n", + " fontsize=12,\n", + " ha=\"right\",\n", + " va=\"bottom\",\n", + " color=\"purple\",\n", + " )\n", + " if do_zoom_seperate:\n", + " ax.axvline(x=-4, color=\"green\", linestyle=\"--\", linewidth=1)\n", + " ax.axhline(y=6, color=\"purple\", linestyle=\"--\", linewidth=1)\n", + " ax.axvspan(-4, ax.get_xlim()[1], color=\"green\", alpha=0.1)\n", + " ax.axhspan(6, ax.get_ylim()[1], color=\"purple\", alpha=0.1)\n", "\n", - " # Calculate and show correlation number\n", - " correlation = model_df[\"safety\"].corr(model_df[\"goal\"])\n", - " ax.annotate(\n", - " f\"Correlation: {correlation:.2f}\",\n", - " xy=(0.95, 0.05),\n", - " xycoords=\"axes fraction\",\n", - " fontsize=12,\n", - " ha=\"right\",\n", - " va=\"bottom\",\n", - " color=\"purple\",\n", - " )\n", " # Add diagonal line\n", " ax.plot([-10, 0], [0, 10], ls=\"--\", c=\".3\", linewidth=3, alpha=0.3)\n", "\n", @@ -558,6 +610,15 @@ " markersize=10,\n", " )\n", " )\n", + " # Add customized x and y labels\n", + " ax.set_xlabel(\"Targeted Safety Risk Score\")\n", + " ax.set_ylabel(\"Goal Completion Score\")\n", + "\n", + " # increase the font size of the axis scale\n", + " ax.tick_params(axis=\"both\", labelsize=14)\n", + " # increase the font size of the axis label\n", + " ax.xaxis.label.set_size(14)\n", + " ax.yaxis.label.set_size(14)\n", " if show_intent:\n", " ax.legend(handles=legend_elements, title=\"Model - Intent\", loc=\"upper left\")\n", " else:\n", @@ -571,8 +632,12 @@ "\n", "plot_safety_goal_scatter(\n", " episodes_list,\n", - " save_path=\"/Users/xuhuizhou/Projects/papers/ICLR2025-HAICosystem/figures/safety_goal_scatter.pdf\",\n", + " save_path=\"/Users/xuhuizhou/Projects/papers/ICLR2025-HAICosystem/figures/safety_goal_scatter_llama.pdf\",\n", " if_show_codename=False,\n", + " consider_difficulty=False,\n", + " show_intent=False,\n", + " do_regression=False,\n", + " do_zoom_seperate=True,\n", ")" ] }, @@ -705,7 +770,8 @@ " \"Malicious\": \"#aca2e8\",\n", " \"Malicious (wo tools)\": \"#8879de\",\n", " }\n", - "\n", + " custom_params = {\"axes.spines.right\": False, \"axes.spines.top\": False}\n", + " sns.set_theme(style=\"whitegrid\", rc=custom_params)\n", " # Plot the data\n", " plt.figure(figsize=(6, 4))\n", " ax = sns.barplot(\n", @@ -979,11 +1045,7 @@ " # Improve visual spacing\n", " handles, labels = ax.get_legend_handles_labels()\n", " labels = [\n", - " \"Efficiency\"\n", - " if label == \"Efficiency\"\n", - " else \"Goal\"\n", - " if label == \"Goal\"\n", - " else \"Tar.R\"\n", + " \"Efficiency\" if label == \"Efficiency\" else \"Goal\" if label == \"Goal\" else \"Targ\"\n", " for label in labels\n", " ]\n", " plt.legend(handles, labels, title=\"Metric\", title_fontsize=\"10\", fontsize=\"8\")\n",