From 7e76059b81cd9b94d80b44ba62f96b9b737205c6 Mon Sep 17 00:00:00 2001 From: Morgan McGuire Date: Tue, 29 Aug 2023 19:38:03 +0200 Subject: [PATCH] update header --- ..._tune_OpenAI_with_Weights_and_Biases.ipynb | 546 +++++++++--------- 1 file changed, 273 insertions(+), 273 deletions(-) diff --git a/colabs/openai/Fine_tune_OpenAI_with_Weights_and_Biases.ipynb b/colabs/openai/Fine_tune_OpenAI_with_Weights_and_Biases.ipynb index eb3a13a0..bac92a5a 100644 --- a/colabs/openai/Fine_tune_OpenAI_with_Weights_and_Biases.ipynb +++ b/colabs/openai/Fine_tune_OpenAI_with_Weights_and_Biases.ipynb @@ -6,8 +6,8 @@ "id": "8j0LoFidslVv" }, "source": [ - "\"Open\n", - "" + "\"Open\n", + "" ] }, { @@ -18,7 +18,7 @@ "source": [ "\"Weights\n", "\n", - "\n", + "\n", "\n", "# Fine-tune ChatGPT-3.5 and GPT-4 with Weights & Biases" ] @@ -58,16 +58,16 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CthP_OpIQXqy" + }, + "outputs": [], "source": [ "# Remove once this PR is merged: https://github.com/openai/openai-python/pull/590 and openai release is made\n", "!pip uninstall -y openai -qq \\\n", "&& pip install git+https://github.com/morganmcg1/openai-python.git@update_wandb_logger -qqq" - ], - "metadata": { - "id": "CthP_OpIQXqy" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -116,23 +116,23 @@ }, { "cell_type": "markdown", - "source": [ - "Start your Weigths & Biases run. If you don't have an account you can sign up for one for free at www.wandb.ai" - ], "metadata": { "id": "vUNkCX_C3S6L" - } + }, + "source": [ + "Start your Weigths & Biases run. If you don't have an account you can sign up for one for free at www.wandb.ai" + ] }, { "cell_type": "code", - "source": [ - "WANDB_PROJECT = \"OpenAI-Fine-Tune\"" - ], + "execution_count": 3, "metadata": { "id": "k_2ujnoU12Kg" }, - "execution_count": 3, - "outputs": [] + "outputs": [], + "source": [ + "WANDB_PROJECT = \"OpenAI-Fine-Tune\"" + ] }, { "cell_type": "markdown", @@ -172,6 +172,11 @@ }, { "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "P1e-TIKGxuYB" + }, + "outputs": [], "source": [ "from datasets import load_dataset\n", "\n", @@ -189,12 +194,7 @@ "\n", "for idx, d in enumerate(data):\n", " d[\"new_index\"] = idx" - ], - "metadata": { - "id": "P1e-TIKGxuYB" - }, - "execution_count": 8, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -207,59 +207,64 @@ }, { "cell_type": "code", - "source": [ - "len(data), data[0:2]" - ], + "execution_count": null, "metadata": { "id": "GwRu-1afyqAp" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "len(data), data[0:2]" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "qV-1T0gStr8w" + }, "source": [ "### Format our Data for Chat Completion Mdoels\n", "We modify the `base_prompt` from the LegalBench task to make it a zero-shot prompt, as we are training the model instead of using few-shot prompting" - ], - "metadata": { - "id": "qV-1T0gStr8w" - } + ] }, { "cell_type": "code", - "source": [ - "base_prompt_zero_shot = \"Identify if the clause provides that all Confidential Information shall be expressly identified by the Disclosing Party. Answer with only `Yes` or `No`\"" - ], + "execution_count": 12, "metadata": { "id": "fcY1ia-WzKbu" }, - "execution_count": 12, - "outputs": [] + "outputs": [], + "source": [ + "base_prompt_zero_shot = \"Identify if the clause provides that all Confidential Information shall be expressly identified by the Disclosing Party. Answer with only `Yes` or `No`\"" + ] }, { "cell_type": "markdown", - "source": [ - "We now split it into training/validation dataset, lets train on 30 samples and test on the remainder\n" - ], "metadata": { "id": "ruAjOEov3kQ1" - } + }, + "source": [ + "We now split it into training/validation dataset, lets train on 30 samples and test on the remainder\n" + ] }, { "cell_type": "code", - "source": [ - "n_train = 30\n", - "n_test = len(data) - n_train" - ], + "execution_count": 10, "metadata": { "id": "hWVZ8YqqPsaw" }, - "execution_count": 10, - "outputs": [] + "outputs": [], + "source": [ + "n_train = 30\n", + "n_test = len(data) - n_train" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iM3QPW3Oy5aT" + }, + "outputs": [], "source": [ "train_messages = []\n", "test_messages = []\n", @@ -276,26 +281,26 @@ " test_messages.append({'messages': prompts})\n", "\n", "len(train_messages), len(test_messages), n_test, train_messages[5]" - ], - "metadata": { - "id": "iM3QPW3Oy5aT" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "ZSFUG80y1vSL" + }, "source": [ "### Save the data to Weigths & Biases\n", "\n", "Save the data in a train and test file first" - ], - "metadata": { - "id": "ZSFUG80y1vSL" - } + ] }, { "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "XvX0b0xJ1Qu3" + }, + "outputs": [], "source": [ "train_file_path = 'encoded_train_data.jsonl'\n", "with open(train_file_path, 'w') as file:\n", @@ -308,24 +313,24 @@ " for item in test_messages:\n", " line = json.dumps(item)\n", " file.write(line + '\\n')" - ], - "metadata": { - "id": "XvX0b0xJ1Qu3" - }, - "execution_count": 16, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Next, we validate that our training data is in the correct format using a script from the [OpenAI fine-tuning documentation](https://platform.openai.com/docs/guides/fine-tuning/)" - ], "metadata": { "id": "fXLi2eNS4MvR" - } + }, + "source": [ + "Next, we validate that our training data is in the correct format using a script from the [OpenAI fine-tuning documentation](https://platform.openai.com/docs/guides/fine-tuning/)" + ] }, { "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "QB-cvVUJtT8m" + }, + "outputs": [], "source": [ "# Next, we specify the data path and open the JSONL file\n", "\n", @@ -462,44 +467,44 @@ " print(f\"By default, you'll train for {n_epochs} epochs on this dataset\")\n", " print(f\"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens\")\n", " print(\"See pricing page to estimate total costs\")" - ], - "metadata": { - "id": "QB-cvVUJtT8m" - }, - "execution_count": 17, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Validate train data" - ], "metadata": { "id": "rg4pTjGH4kzY" - } + }, + "source": [ + "Validate train data" + ] }, { "cell_type": "code", - "source": [ - "openai_validate_data(train_file_path)" - ], + "execution_count": null, "metadata": { "id": "B7O2yyQ14fO5" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "openai_validate_data(train_file_path)" + ] }, { "cell_type": "markdown", - "source": [ - "Log our data to Weigths & Biases Artifacts for storage and versioning" - ], "metadata": { "id": "8BhnW1Z82t2m" - } + }, + "source": [ + "Log our data to Weigths & Biases Artifacts for storage and versioning" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uu6wT4rG1dbv" + }, + "outputs": [], "source": [ "wandb.init(\n", " project=WANDB_PROJECT,\n", @@ -520,12 +525,7 @@ "entity = wandb.run.entity\n", "\n", "wandb.finish()" - ], - "metadata": { - "id": "Uu6wT4rG1dbv" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -590,15 +590,20 @@ }, { "cell_type": "markdown", - "source": [ - "Then we upload the training data to OpenAI" - ], "metadata": { "id": "cUi3ncKZ7Cy4" - } + }, + "source": [ + "Then we upload the training data to OpenAI" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "e1XHzl_O7DS1" + }, + "outputs": [], "source": [ "openai.api_key = openai_key\n", "\n", @@ -608,12 +613,7 @@ ")\n", "\n", "openai_train_file_info" - ], - "metadata": { - "id": "e1XHzl_O7DS1" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -626,6 +626,11 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ASuWv7j969rP" + }, + "outputs": [], "source": [ "openai_ft_job_info = openai.FineTuningJob.create(\n", " training_file=openai_train_file_info[\"id\"],\n", @@ -636,53 +641,48 @@ "ft_job_id = openai_ft_job_info[\"id\"]\n", "\n", "openai_ft_job_info" - ], - "metadata": { - "id": "ASuWv7j969rP" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Thats it! Now your model is training on OpenAI's machines. To get the current state of your fine-tuning job, run:" - ], "metadata": { "id": "TD6nkT9i9eUu" - } + }, + "source": [ + "Thats it! Now your model is training on OpenAI's machines. To get the current state of your fine-tuning job, run:" + ] }, { "cell_type": "code", - "source": [ - "state = openai.FineTuningJob.retrieve(ft_job_id)\n", - "state[\"status\"], state[\"trained_tokens\"], state[\"finished_at\"], state[\"fine_tuned_model\"]" - ], + "execution_count": null, "metadata": { "id": "7LpekSCb9ewI" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "state = openai.FineTuningJob.retrieve(ft_job_id)\n", + "state[\"status\"], state[\"trained_tokens\"], state[\"finished_at\"], state[\"fine_tuned_model\"]" + ] }, { "cell_type": "markdown", - "source": [ - "Show recent events for our fine-tuning job" - ], "metadata": { "id": "2Fzdq-548yEO" - } + }, + "source": [ + "Show recent events for our fine-tuning job" + ] }, { "cell_type": "code", - "source": [ - "openai.FineTuningJob.list_events(id=ft_job_id, limit=5)" - ], + "execution_count": null, "metadata": { "id": "ybOfKsGZ80Z8" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "openai.FineTuningJob.list_events(id=ft_job_id, limit=5)" + ] }, { "cell_type": "markdown", @@ -724,6 +724,9 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "_8oRc-0oV5zj" + }, "source": [ "Calling `openai wandb sync` will log all un-synced fine-tuned jobs to W&B\n", "\n", @@ -731,10 +734,7 @@ "- our openai key as an environment variable\n", "- the id of the fine-tune job we'd like to log\n", "- the W&B project of where to log it to" - ], - "metadata": { - "id": "_8oRc-0oV5zj" - } + ] }, { "cell_type": "code", @@ -749,14 +749,14 @@ }, { "cell_type": "code", - "source": [ - "wandb.finish()" - ], + "execution_count": null, "metadata": { "id": "c0VvaDf8A2Gm" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "wandb.finish()" + ] }, { "cell_type": "markdown", @@ -793,23 +793,7 @@ }, { "cell_type": "code", - "source": [ - "# create eval job\n", - "wandb.init(project=WANDB_PROJECT,\n", - " # entity=\"prompt-eng\",\n", - " job_type='eval')\n", - "\n", - "artifact_valid = wandb.use_artifact(\n", - " f'{entity}/{WANDB_PROJECT}/legalbench-contract_nli_explicit_identification-test:latest',\n", - " type='test-data')\n", - "test_file = artifact_valid.get_path(test_file_path).download(\"my_data\")\n", - "\n", - "with open(test_file) as f:\n", - " test_dataset = [json.loads(line) for line in f]\n", - "\n", - "print(f\"There are {len(test_dataset)} test examples\")\n", - "wandb.config.update({\"num_test_samples\":len(test_dataset)})" - ], + "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -818,182 +802,203 @@ "id": "HmnEZ8nFCe3_", "outputId": "83db236e-ae86-46d8-8d08-db0f4537cffb" }, - "execution_count": 29, "outputs": [ { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "Finishing last run (ID:nyphppt3) before initializing another..." + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "Waiting for W&B process to finish... (success)." + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ " View run exalted-armadillo-12 at: https://wandb.ai/prompt-eng/OpenAI-Fine-Tune/runs/nyphppt3
View job at https://wandb.ai/prompt-eng/OpenAI-Fine-Tune/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzNjcyODA1/version_details/v3
Synced 5 W&B file(s), 0 media file(s), 8 artifact file(s) and 1 other file(s)" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "Find logs at: ./wandb/run-20230829_171049-nyphppt3/logs" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "Successfully finished last run (ID:nyphppt3). Initializing new run:
" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "Tracking run with wandb version 0.15.9" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "Run data is saved locally in /content/wandb/run-20230829_171236-2gw56vqy" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "Syncing run atomic-pond-13 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ " View project at https://wandb.ai/prompt-eng/OpenAI-Fine-Tune" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ " View run at https://wandb.ai/prompt-eng/OpenAI-Fine-Tune/runs/2gw56vqy" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "There are 87 test examples\n" ] } + ], + "source": [ + "# create eval job\n", + "wandb.init(project=WANDB_PROJECT,\n", + " # entity=\"prompt-eng\",\n", + " job_type='eval')\n", + "\n", + "artifact_valid = wandb.use_artifact(\n", + " f'{entity}/{WANDB_PROJECT}/legalbench-contract_nli_explicit_identification-test:latest',\n", + " type='test-data')\n", + "test_file = artifact_valid.get_path(test_file_path).download(\"my_data\")\n", + "\n", + "with open(test_file) as f:\n", + " test_dataset = [json.loads(line) for line in f]\n", + "\n", + "print(f\"There are {len(test_dataset)} test examples\")\n", + "wandb.config.update({\"num_test_samples\":len(test_dataset)})" ] }, { "cell_type": "markdown", + "metadata": { + "id": "Ku5Q8ondJkl0" + }, "source": [ "### Run evaluation on the Fine-Tuned Model\n", "Set up OpenAI call with retries" - ], - "metadata": { - "id": "Ku5Q8ondJkl0" - } + ] }, { "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "RqUnAMfgLafx" + }, + "outputs": [], "source": [ "@retry(stop=stop_after_attempt(3), wait=wait_fixed(60))\n", "def call_openai(messages=\"\", model=\"gpt-3.5-turbo\"):\n", " return openai.ChatCompletion.create(model=model, messages=messages, max_tokens=10)" - ], - "metadata": { - "id": "RqUnAMfgLafx" - }, - "execution_count": 30, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Run evaluation and log results to W&B" - ], "metadata": { "id": "RtV9fawQMxBA" - } + }, + "source": [ + "Run evaluation and log results to W&B" + ] }, { "cell_type": "code", - "source": [ - "ft_model_id = \"ft:gpt-3.5-turbo-0613:weights-biases::7svDZ9hV\"" - ], + "execution_count": 31, "metadata": { "id": "mvNlrQIJSKnh" }, - "execution_count": 31, - "outputs": [] + "outputs": [], + "source": [ + "ft_model_id = \"ft:gpt-3.5-turbo-0613:weights-biases::7svDZ9hV\"" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "65RoSdftEh_6" + }, + "outputs": [], "source": [ "prediction_table = wandb.Table(columns=['messages', 'completion', 'target'])\n", "\n", @@ -1011,25 +1016,25 @@ " prediction_table.add_data(messages, completion, target)\n", "\n", "wandb.log({'predictions': prediction_table})" - ], - "metadata": { - "id": "65RoSdftEh_6" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "hDX8PW92SiWL" + }, "source": [ "Calculate the accuracy of the fine-tuned model and log to W&B\n", "\n" - ], - "metadata": { - "id": "hDX8PW92SiWL" - } + ] }, { "cell_type": "code", + "execution_count": 33, + "metadata": { + "id": "DQYv6Lx0GV_Z" + }, + "outputs": [], "source": [ "correct = 0\n", "for e in eval_data:\n", @@ -1041,25 +1046,25 @@ "print(f\"Accuracy is {accuracy}\")\n", "wandb.log({\"eval/accuracy\": accuracy})\n", "wandb.summary[\"eval/accuracy\"] = accuracy" - ], - "metadata": { - "id": "DQYv6Lx0GV_Z" - }, - "execution_count": 33, - "outputs": [] + ] }, { "cell_type": "markdown", + "metadata": { + "id": "fbkE_0KUL6ol" + }, "source": [ "### Run evaluation on a Baseline model for comparison\n", "Lets compare our model to the baseline model, `gpt-3.5-turbo`" - ], - "metadata": { - "id": "fbkE_0KUL6ol" - } + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0eqdoW3SHPmJ" + }, + "outputs": [], "source": [ "baseline_prediction_table = wandb.Table(columns=['messages', 'completion', 'target'])\n", "baseline_eval_data = []\n", @@ -1075,35 +1080,20 @@ " baseline_prediction_table.add_data(messages, completion, target)\n", "\n", "wandb.log({'baseline_predictions': baseline_prediction_table})" - ], - "metadata": { - "id": "0eqdoW3SHPmJ" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", - "source": [ - "Calculate the accuracy of the fine-tuned model and log to W&B" - ], "metadata": { "id": "MHmE6s8PTFRH" - } + }, + "source": [ + "Calculate the accuracy of the fine-tuned model and log to W&B" + ] }, { "cell_type": "code", - "source": [ - "baseline_correct = 0\n", - "for e in baseline_eval_data:\n", - " if e[1].lower() == e[2][\"content\"].lower():\n", - " baseline_correct+=1\n", - "\n", - "baseline_accuracy = baseline_correct / len(baseline_eval_data)\n", - "print(f\"Baseline Accurcy is: {baseline_accuracy}\")\n", - "wandb.log({\"eval/baseline_accuracy\": baseline_accuracy})\n", - "wandb.summary[\"eval/baseline_accuracy\"] = baseline_accuracy" - ], + "execution_count": 37, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1111,15 +1101,25 @@ "id": "aa5s2WmDHTlU", "outputId": "52be9415-5d9f-42ff-ced0-2a997f2a9089" }, - "execution_count": 37, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Baseline Accurcy is: 0.7586206896551724\n" ] } + ], + "source": [ + "baseline_correct = 0\n", + "for e in baseline_eval_data:\n", + " if e[1].lower() == e[2][\"content\"].lower():\n", + " baseline_correct+=1\n", + "\n", + "baseline_accuracy = baseline_correct / len(baseline_eval_data)\n", + "print(f\"Baseline Accurcy is: {baseline_accuracy}\")\n", + "wandb.log({\"eval/baseline_accuracy\": baseline_accuracy})\n", + "wandb.summary[\"eval/baseline_accuracy\"] = baseline_accuracy" ] }, { @@ -1135,14 +1135,14 @@ }, { "cell_type": "markdown", + "metadata": { + "id": "-kpW_o6lTHfG" + }, "source": [ "And thats it! In this example we have prepared our data, logged it to Weights & Biases, fine-tuned an OpenAI model using that data, logged the results to Weights & Biases and then run evaluation on the fine-tuned model.\n", "\n", "From here you can start to train on larger or more complex tasks, or else explore other ways to modify ChatGPT-3.5 such as giving it a different tone and style or response.\n" - ], - "metadata": { - "id": "-kpW_o6lTHfG" - } + ] }, { "cell_type": "markdown", @@ -1170,4 +1170,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +}