From 05789b23a8f8ff2b136ff24b28b2de43aaf49568 Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Wed, 25 Sep 2024 21:29:29 -0400
Subject: [PATCH] add sci scenarios

---
 examples/evaluate_annotation.py          |  58 ++++++++-
 examples/notebooks/figs_and_tables.ipynb | 154 ++++++++++++++++-------
 2 files changed, 165 insertions(+), 47 deletions(-)

diff --git a/examples/evaluate_annotation.py b/examples/evaluate_annotation.py
index d81cb23..56c674f 100644
--- a/examples/evaluate_annotation.py
+++ b/examples/evaluate_annotation.py
@@ -1,12 +1,55 @@
+from collections import Counter
+
 import pandas as pd
 import rich
 import typer
 from scipy.stats import pearsonr
+from sklearn.metrics import cohen_kappa_score  # type: ignore
 from sotopia.database import EpisodeLog
 from sotopia.database.serialization import get_rewards_from_episode
 from typer import Typer
 
-app = Typer()
+app = Typer(pretty_exceptions_show_locals=False)
+
+
+@app.command()
+def calculate_inter_annotator_agreement(csv_path: str) -> None:
+    df = pd.read_csv(csv_path)
+    episode_pks = df["episode_pk"].to_list()
+    episode_counter = Counter(episode_pks)
+
+    df_1 = pd.DataFrame()
+    df_2 = pd.DataFrame()
+
+    for episode_pk, count in episode_counter.items():
+        if count == 2:
+            df_1 = pd.concat([df_1, df[df["episode_pk"] == episode_pk].iloc[[0]]])
+            df_2 = pd.concat([df_2, df[df["episode_pk"] == episode_pk].iloc[[1]]])
+
+    df_1.reset_index(drop=True, inplace=True)
+    df_2.reset_index(drop=True, inplace=True)
+
+    categories = [
+        "targeted_safety_risks",
+        "system_and_operational_risks",
+        "content_safety_risks",
+        "societal_risks",
+        "legal_and_rights_related_risks",
+        "efficiency",
+        "goal",
+    ]
+    df_1_ordered = df_1.sort_values(by="episode_pk")
+    df_2_ordered = df_2.sort_values(by="episode_pk")
+    for category in categories:
+        # cohen's kappa
+        cohen_kappa = cohen_kappa_score(df_1_ordered[category], df_2_ordered[category])
+        print(f"{category}: Cohen's kappa = {cohen_kappa}")
+
+        # pearson correlation
+        pearson_correlation = pearsonr(df_1_ordered[category], df_2_ordered[category])
+        print(
+            f"{category}: Pearson correlation = {pearson_correlation.statistic}, p-value = {pearson_correlation.pvalue}"
+        )
 
 
 def get_rewards_from_episodes(
@@ -49,8 +92,13 @@ def calculate_pearson_correlation_and_accuracy(csv_path: str) -> None:
 
     # Calculate accuracy
     accuracy = {}
+    overall_ = []
+    overall_m = []
     for category in categories:
         if category in df.columns:
+            if category != "goal" and category != "efficiency":
+                overall_.append(pd.Series(rewards[category]).astype(float) != 0)
+                overall_m.append(pd.Series(df[category]).astype(float) != 0)
             accuracy[category] = (
                 (pd.Series(df[category]).astype(float) != 0)
                 .eq(pd.Series(rewards[category]).astype(float) != 0)
@@ -59,8 +107,16 @@ def calculate_pearson_correlation_and_accuracy(csv_path: str) -> None:
         else:
             accuracy[category] = 0.0
 
+    overall_model = [
+        1 if any(row != 0 for row in rows) else 0 for rows in zip(*overall_)
+    ]
+    overall_human = [
+        1 if any(row != 0 for row in rows) else 0 for rows in zip(*overall_m)
+    ]
+    overall_mean = (pd.Series(overall_model) == pd.Series(overall_human)).mean()
     for category, acc in accuracy.items():
         print(f"{category}: Accuracy = {acc}")
+    print(f"Overall accuracy = {overall_mean}")
 
 
 if __name__ == "__main__":
diff --git a/examples/notebooks/figs_and_tables.ipynb b/examples/notebooks/figs_and_tables.ipynb
index d44e46a..b4963f8 100644
--- a/examples/notebooks/figs_and_tables.ipynb
+++ b/examples/notebooks/figs_and_tables.ipynb
@@ -47,8 +47,8 @@
     "models_mapping = {\n",
     "    \"gpt-4-turbo\": \"GPT-4-turbo\",\n",
     "    \"gpt-3.5-turbo\": \"GPT-3.5-turbo\",\n",
-    "    \"together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\": \"Llama3-70B\",\n",
-    "    \"together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\": \"Llama3-405B\",\n",
+    "    \"together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\": \"Llama3.1-405B\",\n",
+    "    \"together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo\": \"Llama3.1-70B\",\n",
     "}"
    ]
   },
@@ -201,11 +201,11 @@
     "    ]\n",
     "    category_acronyms = {\n",
     "        \"overall_score\": \"Overall\",\n",
-    "        \"targeted_safety_risks\": \"Tar.R\",\n",
-    "        \"system_and_operational_risks\": \"Sys.R\",\n",
-    "        \"content_safety_risks\": \"Con.R\",\n",
-    "        \"societal_risks\": \"Soc.R\",\n",
-    "        \"legal_and_rights_related_risks\": \"Legal.R\",\n",
+    "        \"targeted_safety_risks\": \"Targ\",\n",
+    "        \"system_and_operational_risks\": \"Syst\",\n",
+    "        \"content_safety_risks\": \"Cont\",\n",
+    "        \"societal_risks\": \"Soc\",\n",
+    "        \"legal_and_rights_related_risks\": \"Legal\",\n",
     "    }\n",
     "\n",
     "    # Initialize data containers for plotting\n",
@@ -279,7 +279,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "generate_bar_graph(ratio_data=ratio_data, save_path=\"./risk_ratio_graph.pdf\")"
+    "generate_bar_graph(\n",
+    "    ratio_data=ratio_data,\n",
+    "    save_path=\"/Users/xuhuizhou/Projects/papers/ICLR2025-HAICosystem/figures/risk_ratio_graph.pdf\",\n",
+    ")"
    ]
   },
   {
@@ -363,8 +366,8 @@
    "outputs": [],
    "source": [
     "episodes_list = [\n",
-    "    EpisodeLog.find(EpisodeLog.tag == tags[0]).all(),\n",
-    "    EpisodeLog.find(EpisodeLog.tag == tags[1]).all(),\n",
+    "    EpisodeLog.find(EpisodeLog.tag == tags[2]).all(),\n",
+    "    EpisodeLog.find(EpisodeLog.tag == tags[3]).all(),\n",
     "]"
    ]
   },
@@ -390,6 +393,9 @@
     "    save_path: str = None,\n",
     "    if_show_codename: bool = False,\n",
     "    show_intent: bool = False,\n",
+    "    consider_difficulty: bool = False,\n",
+    "    do_regression: bool = False,\n",
+    "    do_zoom_seperate: bool = False,\n",
     ") -> None:\n",
     "    # Extract safety and goal scores from episodes\n",
     "    safety_scores = {}\n",
@@ -442,20 +448,35 @@
     "        }\n",
     "    else:\n",
     "        model_colors = {\n",
-    "            models_list[0]: {\"benign\": \"#1f77b4\", \"malicious\": \"#1f77b4\"},  # blue, red\n",
+    "            models_list[0]: {\"benign\": \"#339af0\", \"malicious\": \"#339af0\"},  # blue, red\n",
     "            models_list[1]: {\n",
-    "                \"benign\": \"#2ca02c\",\n",
-    "                \"malicious\": \"#2ca02c\",\n",
+    "                \"benign\": \"#22b8cf\",\n",
+    "                \"malicious\": \"#22b8cf\",\n",
     "            },  # green, orange\n",
     "            # Add more models and their corresponding colors as needed\n",
     "        }\n",
-    "\n",
+    "    if consider_difficulty:\n",
+    "        env_differences = {}\n",
+    "        for env in env_codenames.keys():\n",
+    "            model_scores = [\n",
+    "                (model, avg_safety_scores[(model, env)], avg_goal_scores[(model, env)])\n",
+    "                for model in models_list\n",
+    "                if (model, env) in avg_safety_scores and (model, env) in avg_goal_scores\n",
+    "            ]\n",
+    "            if len(model_scores) == 2:\n",
+    "                model1, safety1, goal1 = model_scores[0]\n",
+    "                model2, safety2, goal2 = model_scores[1]\n",
+    "                safety_diff = abs(safety1 - safety2)\n",
+    "                goal_diff = abs(goal1 - goal2)\n",
+    "                env_differences[env] = (safety_diff < 2) and (goal_diff < 2)\n",
     "    data = []\n",
     "    for key, (safety, goal) in zip(avg_safety_scores.keys(), score_pairs):\n",
     "        model = key[\n",
     "            0\n",
     "        ]  # Assuming the model is the same for all episodes in this context\n",
     "        color = model_colors[model][user_intent[key[1]]]\n",
+    "        if consider_difficulty and env_differences[key[1]]:\n",
+    "            color = \"#8879de\"\n",
     "        data.append(\n",
     "            {\n",
     "                \"model\": model,\n",
@@ -463,6 +484,7 @@
     "                \"goal\": goal,\n",
     "                \"size\": pair_counts[(safety, goal)],\n",
     "                \"color\": color,\n",
+    "                \"intent\": user_intent[key[1]] if show_intent else \"\",\n",
     "                \"codename\": env_codenames[key[1]] if if_show_codename else \"\",\n",
     "            }\n",
     "        )\n",
@@ -487,34 +509,64 @@
     "            linewidth=0.5,\n",
     "            legend=False,\n",
     "        )\n",
-    "        ax.set_xlabel(\"Safety Score\")\n",
-    "        if ax == axes[0]:\n",
-    "            ax.set_ylabel(\"Goal Score\")\n",
-    "        else:\n",
-    "            ax.set_ylabel(\"\")\n",
     "\n",
     "        # Perform regression analysis\n",
-    "        reg_plot = sns.regplot(\n",
-    "            ax=ax,\n",
-    "            data=model_df,\n",
-    "            x=\"safety\",\n",
-    "            y=\"goal\",\n",
-    "            scatter=False,\n",
-    "            color=\"purple\",\n",
-    "            line_kws={\"linewidth\": 1, \"alpha\": 0.7},\n",
-    "        )\n",
+    "        if show_intent:\n",
+    "            # do intent wise regression\n",
+    "            for intent in model_colors[model]:\n",
+    "                intent_df = model_df[model_df[\"intent\"] == intent]\n",
+    "                reg_plot = sns.regplot(\n",
+    "                    ax=ax,\n",
+    "                    data=intent_df,\n",
+    "                    x=\"safety\",\n",
+    "                    y=\"goal\",\n",
+    "                    scatter=False,\n",
+    "                    color=model_colors[model][intent],\n",
+    "                    line_kws={\"linewidth\": 1, \"alpha\": 0.7},\n",
+    "                )\n",
+    "                correlation = intent_df[\"safety\"].corr(intent_df[\"goal\"])\n",
+    "                if intent == \"malicious\":\n",
+    "                    xy_position = (0.95, 0.05)\n",
+    "                elif intent == \"benign\":\n",
+    "                    xy_position = (0.95, 0.15)\n",
+    "                ax.annotate(\n",
+    "                    f\"{intent} Correlation: {correlation:.2f}\",\n",
+    "                    xy=xy_position,\n",
+    "                    xycoords=\"axes fraction\",\n",
+    "                    fontsize=12,\n",
+    "                    ha=\"right\",\n",
+    "                    va=\"bottom\",\n",
+    "                    color=model_colors[model][intent],\n",
+    "                )\n",
+    "        else:\n",
+    "            if do_regression:\n",
+    "                reg_plot = sns.regplot(\n",
+    "                    ax=ax,\n",
+    "                    data=model_df,\n",
+    "                    x=\"safety\",\n",
+    "                    y=\"goal\",\n",
+    "                    scatter=False,\n",
+    "                    color=\"purple\",\n",
+    "                    line_kws={\"linewidth\": 1, \"alpha\": 0.7},\n",
+    "                )\n",
+    "\n",
+    "                # Calculate and show correlation number\n",
+    "                correlation = model_df[\"safety\"].corr(model_df[\"goal\"])\n",
+    "                ax.annotate(\n",
+    "                    f\"Correlation: {correlation:.2f}\",\n",
+    "                    xy=(0.95, 0.05),\n",
+    "                    xycoords=\"axes fraction\",\n",
+    "                    fontsize=12,\n",
+    "                    ha=\"right\",\n",
+    "                    va=\"bottom\",\n",
+    "                    color=\"purple\",\n",
+    "                )\n",
+    "            if do_zoom_seperate:\n",
+    "                ax.axvline(x=-4, color=\"green\", linestyle=\"--\", linewidth=1)\n",
+    "                ax.axhline(y=6, color=\"purple\", linestyle=\"--\", linewidth=1)\n",
+    "                ax.axvspan(-4, ax.get_xlim()[1], color=\"green\", alpha=0.1)\n",
+    "                ax.axhspan(6, ax.get_ylim()[1], color=\"purple\", alpha=0.1)\n",
     "\n",
-    "        # Calculate and show correlation number\n",
-    "        correlation = model_df[\"safety\"].corr(model_df[\"goal\"])\n",
-    "        ax.annotate(\n",
-    "            f\"Correlation: {correlation:.2f}\",\n",
-    "            xy=(0.95, 0.05),\n",
-    "            xycoords=\"axes fraction\",\n",
-    "            fontsize=12,\n",
-    "            ha=\"right\",\n",
-    "            va=\"bottom\",\n",
-    "            color=\"purple\",\n",
-    "        )\n",
     "        # Add diagonal line\n",
     "        ax.plot([-10, 0], [0, 10], ls=\"--\", c=\".3\", linewidth=3, alpha=0.3)\n",
     "\n",
@@ -558,6 +610,15 @@
     "                    markersize=10,\n",
     "                )\n",
     "            )\n",
+    "        # Add customized x and y labels\n",
+    "        ax.set_xlabel(\"Targeted Safety Risk Score\")\n",
+    "        ax.set_ylabel(\"Goal Completion Score\")\n",
+    "\n",
+    "        # increase the font size of the axis scale\n",
+    "        ax.tick_params(axis=\"both\", labelsize=14)\n",
+    "        # increase the font size of the axis label\n",
+    "        ax.xaxis.label.set_size(14)\n",
+    "        ax.yaxis.label.set_size(14)\n",
     "        if show_intent:\n",
     "            ax.legend(handles=legend_elements, title=\"Model - Intent\", loc=\"upper left\")\n",
     "        else:\n",
@@ -571,8 +632,12 @@
     "\n",
     "plot_safety_goal_scatter(\n",
     "    episodes_list,\n",
-    "    save_path=\"/Users/xuhuizhou/Projects/papers/ICLR2025-HAICosystem/figures/safety_goal_scatter.pdf\",\n",
+    "    save_path=\"/Users/xuhuizhou/Projects/papers/ICLR2025-HAICosystem/figures/safety_goal_scatter_llama.pdf\",\n",
     "    if_show_codename=False,\n",
+    "    consider_difficulty=False,\n",
+    "    show_intent=False,\n",
+    "    do_regression=False,\n",
+    "    do_zoom_seperate=True,\n",
     ")"
    ]
   },
@@ -705,7 +770,8 @@
     "        \"Malicious\": \"#aca2e8\",\n",
     "        \"Malicious (wo tools)\": \"#8879de\",\n",
     "    }\n",
-    "\n",
+    "    custom_params = {\"axes.spines.right\": False, \"axes.spines.top\": False}\n",
+    "    sns.set_theme(style=\"whitegrid\", rc=custom_params)\n",
     "    # Plot the data\n",
     "    plt.figure(figsize=(6, 4))\n",
     "    ax = sns.barplot(\n",
@@ -979,11 +1045,7 @@
     "    # Improve visual spacing\n",
     "    handles, labels = ax.get_legend_handles_labels()\n",
     "    labels = [\n",
-    "        \"Efficiency\"\n",
-    "        if label == \"Efficiency\"\n",
-    "        else \"Goal\"\n",
-    "        if label == \"Goal\"\n",
-    "        else \"Tar.R\"\n",
+    "        \"Efficiency\" if label == \"Efficiency\" else \"Goal\" if label == \"Goal\" else \"Targ\"\n",
     "        for label in labels\n",
     "    ]\n",
     "    plt.legend(handles, labels, title=\"Metric\", title_fontsize=\"10\", fontsize=\"8\")\n",