From 037d7c6a75631ea1627d6f6f2def26d15d09f25d Mon Sep 17 00:00:00 2001
From: Soumik Rakshit <19soumik.rakshit96@gmail.com>
Date: Thu, 3 Aug 2023 11:25:04 +0000
Subject: [PATCH 1/3] add: sdxl-compel notebook
---
colabs/diffusers/sdxl-compel.ipynb | 283 +++++++++++++++++++++++++++++
1 file changed, 283 insertions(+)
create mode 100644 colabs/diffusers/sdxl-compel.ipynb
diff --git a/colabs/diffusers/sdxl-compel.ipynb b/colabs/diffusers/sdxl-compel.ipynb
new file mode 100644
index 00000000..a54b18ff
--- /dev/null
+++ b/colabs/diffusers/sdxl-compel.ipynb
@@ -0,0 +1,283 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Prompt Weighing and Blending using for SDXL 1.0 using [Compel](https://github.com/damian0815/compel) and [๐งจ Diffusers](https://huggingface.co/docs/diffusers)\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "This notebook demonstrates the following:\n",
+ "- Performing text-conditional image-generations using [๐งจ Diffusers](https://huggingface.co/docs/diffusers).\n",
+ "- Using the Stable Diffusion XL Refiner pipeline to further refine the outputs of the base model.\n",
+ "- Manage image generation experiments using [Weights & Biases](http://wandb.ai/geekyrakshit).\n",
+ "- Log the prompts and generated images to [Weigts & Biases](http://wandb.ai/geekyrakshit) for visalization."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Installing the Dependencies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -qq diffusers[\"torch\"] transformers compel wandb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "import wandb\n",
+ "from diffusers import DiffusionPipeline, EulerDiscreteScheduler\n",
+ "from compel import Compel, ReturnedEmbeddingsType"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Experiment Management using Weights & Biases\n",
+ "\n",
+ "Managing our image generation experiments is crucial for the sake of reproducibility. Hence we sync all the configs of our experiments with our Weights & Biases run. This stores all the configs of the experiments, right from the prompts to the refinement technque and the configuration of the scheduler."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wandb.init(project=\"stable-diffusion-xl\", entity=\"geekyrakshit\", job_type=\"text-to-image-compel\", save_code=True)\n",
+ "\n",
+ "config = wandb.config\n",
+ "config.stable_diffusion_checkpoint = \"stabilityai/stable-diffusion-xl-base-1.0\"\n",
+ "config.refiner_checkpoint = \"stabilityai/stable-diffusion-xl-refiner-1.0\"\n",
+ "config.offload_to_cpu = False\n",
+ "config.compile_model = False\n",
+ "# config.prompt = '(\"A highly detained photo\", \"a man with the head of an elephant\", \"a golden tiara on the head\", \"a snake wrapped around his belly\", \"The Himalayan mountains are the backdrop of the photo\", \"Realistic, cold and bright color grading, 8k.\").and(0.7, 1, 0.5, 0.9, 0.6, 0.9)'\n",
+ "config.prompt_1 = \"a cat playing with a ball in the (forest)---------\"\n",
+ "config.prompt_2 = \"Realistic, highly detailed, cold and bright color grading, 8k.\"\n",
+ "config.negative_prompt_1 = \"low-quality\"\n",
+ "config.negative_prompt_2 = \"low-quality\"\n",
+ "config.seed = 42\n",
+ "config.use_ensemble_of_experts = False\n",
+ "config.num_inference_steps = 100\n",
+ "config.num_refinement_steps = 150\n",
+ "config.high_noise_fraction = 0.8 # Set explicitly only if config.use_ensemble_of_experts is True\n",
+ "config.scheduler_kwargs = {\n",
+ " \"beta_end\": 0.012,\n",
+ " \"beta_schedule\": \"scaled_linear\", # one of [\"linear\", \"scaled_linear\"]\n",
+ " \"beta_start\": 0.00085,\n",
+ " \"interpolation_type\": \"linear\", # one of [\"linear\", \"log_linear\"]\n",
+ " \"num_train_timesteps\": 1000,\n",
+ " \"prediction_type\": \"epsilon\", # one of [\"epsilon\", \"sample\", \"v_prediction\"]\n",
+ " \"steps_offset\": 1,\n",
+ " \"timestep_spacing\": \"leading\", # one of [\"linspace\", \"leading\"]\n",
+ " \"trained_betas\": None,\n",
+ " \"use_karras_sigmas\": False,\n",
+ "}\n",
+ "config.prompt_credits = \"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can make the experiment deterministic based on the seed specified in the experiment configs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if config.seed is not None:\n",
+ " generator = [torch.Generator(device=\"cuda\").manual_seed(config.seed)]\n",
+ "else:\n",
+ " generator = [torch.Generator(device=\"cuda\")]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Creating the Diffusion Pipelines\n",
+ "\n",
+ "For performing text-conditional image generation, we use the `diffusers` library to define the diffusion pipelines corresponding to the base SDXL model and the SDXL refinement model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipe = DiffusionPipeline.from_pretrained(\n",
+ " config.stable_diffusion_checkpoint,\n",
+ " torch_dtype=torch.float16,\n",
+ " variant=\"fp16\",\n",
+ " use_safetensors=True,\n",
+ " scheduler=EulerDiscreteScheduler(**config.scheduler_kwargs),\n",
+ ")\n",
+ "\n",
+ "if config.offload_to_cpu:\n",
+ " pipe.enable_model_cpu_offload()\n",
+ "else:\n",
+ " pipe.to(\"cuda\")\n",
+ "\n",
+ "if config.compile_model:\n",
+ " pipe.unet = torch.compile(pipe.unet, mode=\"reduce-overhead\", fullgraph=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if config.prompt_2 == \"\" and config.negative_prompt_2 == \"\":\n",
+ " base_compel = Compel(\n",
+ " tokenizer=[pipe.tokenizer, pipe.tokenizer_2],\n",
+ " text_encoder=[pipe.text_encoder, pipe.text_encoder_2],\n",
+ " returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,\n",
+ " requires_pooled=[False, True]\n",
+ " )\n",
+ "\n",
+ " base_positive_prompt_embeds, base_positive_prompt_pooled = base_compel(config.prompt)\n",
+ " base_negative_prompt_embeds, base_negative_prompt_pooled = base_compel(config.negative_prompt)\n",
+ " base_positive_prompt_embeds, base_negative_prompt_embeds = base_compel.pad_conditioning_tensors_to_same_length([\n",
+ " base_positive_prompt_embeds, base_negative_prompt_embeds\n",
+ " ])\n",
+ "else:\n",
+ " base_compel_1 = Compel(\n",
+ " tokenizer=pipe.tokenizer,\n",
+ " text_encoder=pipe.text_encoder,\n",
+ " returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,\n",
+ " requires_pooled=False,\n",
+ " )\n",
+ "\n",
+ " base_positive_prompt_embeds_1 = base_compel_1(config.prompt_1)\n",
+ " base_negative_prompt_embeds_1 = base_compel_1(config.negative_prompt_1)\n",
+ " \n",
+ " base_compel_2 = Compel(\n",
+ " tokenizer=pipe.tokenizer_2,\n",
+ " text_encoder=pipe.text_encoder_2,\n",
+ " returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,\n",
+ " requires_pooled=True,\n",
+ " )\n",
+ "\n",
+ " base_positive_prompt_embeds_2, base_positive_prompt_pooled = base_compel_2(config.prompt_2)\n",
+ " base_negative_prompt_embeds_2, base_negative_prompt_pooled = base_compel_2(config.negative_prompt_2)\n",
+ " \n",
+ " (\n",
+ " base_positive_prompt_embeds_2, base_negative_prompt_embeds_2\n",
+ " ) = base_compel_2.pad_conditioning_tensors_to_same_length([\n",
+ " base_positive_prompt_embeds_2, base_negative_prompt_embeds_2\n",
+ " ])\n",
+ " \n",
+ " base_positive_prompt_embeds = torch.cat((base_positive_prompt_embeds_1, base_positive_prompt_embeds_2), dim=-1)\n",
+ " base_negative_prompt_embeds = torch.cat((base_negative_prompt_embeds_1, base_negative_prompt_embeds_2), dim=-1)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Text-to-Image Generation\n",
+ "\n",
+ "Now, we pass the embeddings and pooled prompts to the Stable Diffusion XL pipeline."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "image = pipe(\n",
+ " prompt_embeds=base_positive_prompt_embeds,\n",
+ " pooled_prompt_embeds=base_positive_prompt_pooled,\n",
+ " negative_prompt_embeds=base_negative_prompt_embeds,\n",
+ " negative_pooled_prompt_embeds=base_negative_prompt_pooled,\n",
+ " output_type=\"pil\",\n",
+ " num_inference_steps=config.num_inference_steps,\n",
+ " generator=generator,\n",
+ ").images[0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Logging the Images to Weights & Biases\n",
+ "\n",
+ "Now, we log the images to Weights & Biases. This enables us to:\n",
+ "\n",
+ "- Visualize our generations\n",
+ "- Examine the generated images across different images\n",
+ "- Ensure reproducibility of the experiments"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table = wandb.Table(columns=[\n",
+ " \"Prompt-1\",\n",
+ " \"Prompt-2\",\n",
+ " \"Negative-Prompt-1\",\n",
+ " \"Negative-Prompt-2\",\n",
+ " \"Generated-Image\"\n",
+ "])\n",
+ "\n",
+ "image = wandb.Image(image)\n",
+ "\n",
+ "table.add_data(\n",
+ " config.prompt_1,\n",
+ " config.prompt_2,\n",
+ " config.negative_prompt_1,\n",
+ " config.negative_prompt_2,\n",
+ " image,\n",
+ ")\n",
+ "wandb.log({\n",
+ " \"Generated-Image\": image,\n",
+ " \"Text-to-Image\": table\n",
+ "})\n",
+ "wandb.finish()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In order to learn about how to use Stable Diffusion XL with the refiner pipeline from ๐งจ Diffusers and ๐ Weights & Biases, check out the following notebook ๐\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "name": "python"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
From ca665f9f651e336dfeecf6a49404f9a24a18d8f3 Mon Sep 17 00:00:00 2001
From: Soumik Rakshit <19soumik.rakshit96@gmail.com>
Date: Thu, 3 Aug 2023 11:49:57 +0000
Subject: [PATCH 2/3] update: colab
---
colabs/README.md | 4 ++--
colabs/diffusers/sdxl-compel.ipynb | 10 +++++++++-
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/colabs/README.md b/colabs/README.md
index a1634221..f0d55bba 100644
--- a/colabs/README.md
+++ b/colabs/README.md
@@ -19,12 +19,12 @@
| Ultralytics Inference | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/ultralytics-inference) |
| Ray/Tune | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/raytune-colab) |
| ๐ค Diffusers | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/diffusers-uncond-colab) |
-| ๐ค Diffusers Stable Diffusion XL 1.0 Text-to-Image | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/sdxl-colab) |
+| ๐งจ Diffusers Stable Diffusion XL 1.0 Text-to-Image | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/sdxl-colab) |
+| Controlling and Enhancing Stable Diffusion Prompts using Compel and ๐งจ Diffusers | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/sdxl-compel-colab) |
| ๐งจ Dreambooth-Keras Train | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/db-keras-train) |
| ๐งจ Dreambooth-Keras Inference | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/db-keras-inference) |
| Kaolin-Wisp | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://wandb.me/vqad-colab) |
-
# ๐๐ฝโโ๏ธ W&B Features
| Notebook | Link |
diff --git a/colabs/diffusers/sdxl-compel.ipynb b/colabs/diffusers/sdxl-compel.ipynb
index a54b18ff..ff317349 100644
--- a/colabs/diffusers/sdxl-compel.ipynb
+++ b/colabs/diffusers/sdxl-compel.ipynb
@@ -66,7 +66,6 @@
"config.refiner_checkpoint = \"stabilityai/stable-diffusion-xl-refiner-1.0\"\n",
"config.offload_to_cpu = False\n",
"config.compile_model = False\n",
- "# config.prompt = '(\"A highly detained photo\", \"a man with the head of an elephant\", \"a golden tiara on the head\", \"a snake wrapped around his belly\", \"The Himalayan mountains are the backdrop of the photo\", \"Realistic, cold and bright color grading, 8k.\").and(0.7, 1, 0.5, 0.9, 0.6, 0.9)'\n",
"config.prompt_1 = \"a cat playing with a ball in the (forest)---------\"\n",
"config.prompt_2 = \"Realistic, highly detailed, cold and bright color grading, 8k.\"\n",
"config.negative_prompt_1 = \"low-quality\"\n",
@@ -262,6 +261,15 @@
"wandb.finish()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Here's how you can control your prompts using Compel and manage them using Weights & Biases ๐\n",
+ "\n",
+ "![](https://i.imgur.com/iUQH9XR.png)"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
From c44aac64dd708a1a3eed6c2a64e8368205e5e021 Mon Sep 17 00:00:00 2001
From: Thomas Capelle
Date: Thu, 3 Aug 2023 14:02:53 +0200
Subject: [PATCH 3/3] clean up
---
colabs/diffusers/sdxl-compel.ipynb | 30 ++++++++++++++++--------------
1 file changed, 16 insertions(+), 14 deletions(-)
diff --git a/colabs/diffusers/sdxl-compel.ipynb b/colabs/diffusers/sdxl-compel.ipynb
index ff317349..4920efd5 100644
--- a/colabs/diffusers/sdxl-compel.ipynb
+++ b/colabs/diffusers/sdxl-compel.ipynb
@@ -1,13 +1,18 @@
{
"cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Prompt Weighing and Blending using for SDXL 1.0 using [Compel](https://github.com/damian0815/compel) and [๐งจ Diffusers](https://huggingface.co/docs/diffusers)\n",
"\n",
- "\n",
- "\n",
"\n",
"This notebook demonstrates the following:\n",
"- Performing text-conditional image-generations using [๐งจ Diffusers](https://huggingface.co/docs/diffusers).\n",
@@ -269,22 +274,19 @@
"\n",
"![](https://i.imgur.com/iUQH9XR.png)"
]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In order to learn about how to use Stable Diffusion XL with the refiner pipeline from ๐งจ Diffusers and ๐ Weights & Biases, check out the following notebook ๐\n",
- "\n",
- "\n"
- ]
}
],
"metadata": {
- "language_info": {
- "name": "python"
+ "accelerator": "GPU",
+ "colab": {
+ "include_colab_link": true,
+ "provenance": [],
+ "toc_visible": true
},
- "orig_nbformat": 4
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ }
},
"nbformat": 4,
"nbformat_minor": 2