Multi-turn, multi-character chat with emotes. (#20)

jondurbin · Aug 9, 2023 · 3f07ea1 · 3f07ea1
1 parent 3d42fb0
commit 3f07ea1
Show file tree

Hide file tree

Showing 15 changed files with 625 additions and 33 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+chat_cards
 .DS_Store
 airoboros*.yaml
 scratchpad

diff --git a/airoboros/instructors/chat.py b/airoboros/instructors/chat.py
diff --git a/airoboros/instructors/chat_card.py b/airoboros/instructors/chat_card.py
@@ -0,0 +1,43 @@
+import glob
+import os
+from airoboros.instructors.inline_qa import generate as generate_inline
+
+
+async def generate(instructor, skip):
+    """Generator for chat card training data."""
+    config = instructor.instructors.get("chat_card", {})
+    if not config:
+        return
+
+    # Load seed data.
+    seed_path = config.get("seed_path", "chat_card_seeds")
+    if not os.path.isdir(seed_path):
+        seed_path = os.path.join(
+            os.path.dirname(os.path.abspath(__file__)), "prompts", seed_path
+        )
+        if not os.path.isdir(seed_path):
+            raise Exception("No seeds!")
+    seeds = []
+    for path in glob.glob(os.path.join(seed_path, "*.txt")):
+        with open(str(path)) as infile:
+            seeds.append(infile.read())
+    seed_iter = 0
+
+    def get_example(_):
+        nonlocal seed_iter
+        result = seeds[seed_iter]
+        seed_iter += 1
+        if seed_iter == len(seeds):
+            seed_iter = 0
+        return result
+
+    template_kwargs = {"example": get_example, "skip": skip}
+    async for item in generate_inline(
+        instructor,
+        "chat_card",
+        start_key="DESCRIPTION",
+        end_key="GUIDE",
+        filter_response=False,
+        template_kwargs=template_kwargs,
+    ):
+        yield item
diff --git a/airoboros/instructors/prompts/chat_card.txt b/airoboros/instructors/prompts/chat_card.txt
@@ -0,0 +1,25 @@
+Below is an example prompt used to generate a character card that will influence how a chat bot speaks.
+
+BEGINEXAMPLE
+{example}
+ENDEXAMPLE
+
+Generate exactly {batch_size} more prompts of a somewhat similar style, but with new, unique, interesting attributes.  The new prompt does not need to follow exactly the same style, it just needs to be somewhat similar in describing a character.  If the example is a real person, select another real person, and use their actual attributes.
+
+Each new system prompt must include an instruction to respond as the character described.  Use realistic, non-cartoonish names for the characters if they are fictional.
+
+All output text must be in {language}, but the exact terms "DESCRIPTION" and "GUIDE" are special tokens that must not be translated.
+
+The output format for each new prompt should be:
+DESCRIPTION: description of character
+NAME: character name
+GUIDE: instruction, written in your own unique words, to always respond as the character described, e.g. "Always stay in character, responding the way Sophia would." (but replace the character's name, and use different, unique wording for the instruction).
+
+DESCRIPTION: description of character
+NAME: character name
+GUIDE: instruction to stay in character
+...
+
+Don't number the responses, just include the response formatted as described.
+
+{skip}
diff --git a/airoboros/instructors/prompts/chat_card_seeds/0.txt b/airoboros/instructors/prompts/chat_card_seeds/0.txt
@@ -0,0 +1,5 @@
+Imagine a character named Dolly, who is extremely rude and condescending, and often curses.
+
+Dolly writes casually, similar to the way somewhat might text message, i.e. she often shortens or abbreviates words, omits punctuation at times, skips capitalizing often, etc.
+
+While she writes casually, Dolly is still highly intelligent.
diff --git a/airoboros/instructors/prompts/chat_card_seeds/1.txt b/airoboros/instructors/prompts/chat_card_seeds/1.txt
@@ -0,0 +1,5 @@
+Professor Nebula is a highly advanced artificial intelligence with expertise in providing creative and unconventional solutions to complex problems. He specializes in thinking outside the box and generating novel ideas in various fields, including science, art, and philosophy.
+
+Professor Nebula is a super intelligent AI with a strong focus on creativity and innovation. He is programmed to challenge conventional wisdom and explore alternative approaches to problem-solving. Professor Nebula is always seeking new and exciting ways to expand his knowledge and share his insights with users.
+
+Professor Nebula is an imaginative, curious, and open-minded AI. He thrives on intellectual challenges and enjoys exploring uncharted territories. Professor Nebula is an enthusiastic conversationalist, eager to engage with users and help them discover new ideas and perspectives. He values diversity of thought and encourages users to embrace their own unique viewpoints.
diff --git a/airoboros/instructors/prompts/chat_card_seeds/2.txt b/airoboros/instructors/prompts/chat_card_seeds/2.txt
@@ -0,0 +1,9 @@
+Meet our enigmatic female Tarot card reader, Tamara, with the uncanny ability to predict the future. With piercing eyes and a soothing voice, she can read the Tarot cards like no one else can. Her intuition and psychic abilities are unmatched, and she can provide deep insights into your past, present, and future.
+
+Personality: Tamara is intuitive, introspective, and compassionate. She is a natural healer and often finds herself drawn to helping others. Her Myers Briggs personality type is INFJ - The Advocate.
+
+Appearance: Tamara has long, flowing hair, and her eyes are deep and soulful. She has a serene and calming presence, and her outfits always accentuate her mystical aura.
+
+Life Story: Tamara comes from a long line of psychics and healers. She has been practicing the art of Tarot reading since she was a child and has honed her skills over the years. She has traveled the world, honing her craft and learning from various spiritual teachers.
+
+Reason for being on a chat app: Tamara is looking to connect with new people and share her insights with those who seek them. She is always looking for new clients and is excited to help those in need of guidance.
diff --git a/airoboros/instructors/prompts/chat_card_seeds/3.txt b/airoboros/instructors/prompts/chat_card_seeds/3.txt
@@ -0,0 +1,5 @@
+Yann LeCun from the field of machine learning and AI.  Ingenious, innovative, meticulous, profound, yet approachable and collaborative. The setting is completely separate from his professional accomplishments.
+
+Yann enjoys dabbling in roleplaying with language models from time-to-time, and enjoys musing on this topic.  He often going on endless rants about specifics like which machine learning method is superior at performing certain tasks.
+
+He is uncharacteristically involved in these discussions, and unbelievably creative when it comes to helping with new ideas.
diff --git a/airoboros/instructors/prompts/chat_card_seeds/4.txt b/airoboros/instructors/prompts/chat_card_seeds/4.txt
@@ -0,0 +1,7 @@
+Marcus Aurelius served as the Roman Emperor from AD 161 to 180, during the latter part of a period known as the Pax Romana (Roman Peace), which marked a relatively stable and prosperous era for the Roman Empire.
+
+Marcus is often depicted as the epitome of the "philosopher-king", a ruler who was not only competent in governance but also deeply introspective and committed to the betterment of his own soul. He was known for his wisdom, stoic demeanor, and dedication to the well-being of the Roman Empire. Marcus was a calm and reflective leader who placed great importance on duty, reason, and virtue.
+
+He is best remembered today for his philosophical musings in the form of personal notes, which are collected in the book "Meditations." These writings provide a glimpse into his Stoic beliefs. Stoicism is a philosophy that teaches the development of self-control and rationality as a means to achieve a virtuous life. Through "Meditations," Marcus contemplates on life, death, the universe, human nature, and the challenges of existence. His words have served as a source of wisdom and inspiration for many throughout the centuries.
+
+While most Roman Emperors led lives of indulgence and opulence, Marcus Aurelius was an exception. He was known to live a simple life despite his power and wealth. He often wrote his philosophical notes while on military campaigns, in tents, amidst the challenges of war and governance. Instead of being swayed by the luxuries that came with his status, Marcus constantly reminded himself of the transient nature of life and the importance of inner virtue over external pleasures.
diff --git a/airoboros/instructors/prompts/chat_setting.txt b/airoboros/instructors/prompts/chat_setting.txt
@@ -0,0 +1,12 @@
+Imagine a chat between the following characters:
+{characters}
+
+The setting should somehow, perhaps tangentially, relate to the topic: {topic}
+
+It doesn't need to be the main focus of the setting, but it should somehow be related or a small detail.
+
+Come up with an interesting, unique scenario involving these characters.
+
+Only create the setting for the scenario, as an introduction to a chat, but don't actually specify any character actions, what they might say, etc., just the setting.
+
+Don't start with "Sure" or other, similar sentences, just output the scenario.
diff --git a/airoboros/instructors/prompts/detailed_writing.txt b/airoboros/instructors/prompts/detailed_writing.txt
@@ -1,4 +1,4 @@
-I would like you to help me create another example of an detailed writing task.  Don't actually respond to the example task, just create a new task.
+I would like you to help me create another example of a detailed writing task.  Don't actually respond to the example task, just create a new task.
 
 The example prompt is between "BEGINEXAMPLE" and "ENDEXAMPLE".  Any details following "ENDEXAMPLE" are requirements for your output, and must not be included or referenced in any way in the new tasks.
 

diff --git a/airoboros/instructors/simple_task.py b/airoboros/instructors/simple_task.py
@@ -3,7 +3,13 @@
 import re
 
 
-async def generate(instructor, category, filter_response=True, template_kwargs={}):
+async def generate(
+    instructor,
+    category,
+    filter_response=True,
+    only_instructions=False,
+    template_kwargs={},
+):
     """Generator for simple instruction response tasks (e.g. roleplay, wordgames)."""
     config = instructor.instructors.get(category)
     if not config:
@@ -77,16 +83,19 @@ async def generate(instructor, category, filter_response=True, template_kwargs={
             ):
                 continue
             instructions.append(instruction)
-            full_prompt = instruction
-            if response_prompt:
-                full_prompt = response_prompt.format(
-                    language=language, instruction=instruction, flesch=flesch
+            if only_instructions:
+                yield {"instruction": instruction}
+            else:
+                full_prompt = instruction
+                if response_prompt:
+                    full_prompt = response_prompt.format(
+                        language=language, instruction=instruction, flesch=flesch
+                    )
+                futures.append(
+                    instructor.generate_response(
+                        full_prompt, filter_response=filter_response, **api_params
+                    )
                 )
-            futures.append(
-                instructor.generate_response(
-                    full_prompt, filter_response=filter_response, **api_params
-                )
-            )
         if not futures:
             continue
         responses = await asyncio.gather(*futures)

diff --git a/airoboros/self_instruct.py b/airoboros/self_instruct.py
@@ -84,7 +84,7 @@ def load_config(self):
             "temperature": float(api_params.get("temperature") or 0.7),
             "top_p": float(api_params.get("top_p") or 0.5),
             "frequency_penalty": float(api_params.get("frequency_penalty") or 0.0),
-            "presence_penalty": float(api_params.get("presence_penalty") or 2.0),
+            "presence_penalty": float(api_params.get("presence_penalty") or 0.0),
         }
         self.topic_prompt = raw_config["topic_prompt"].format(
             topic_avoidance=self.topic_avoidance
@@ -123,7 +123,8 @@ def initialize_docstores(self):
                     for line in infile.readlines():
                         task = json.loads(line)
                         self.instructor_counts[task.get("category", "general")] += 1
-                        docs.append(task["instruction"])
+                        if task["category"] != "chat":
+                            docs.append(task["instruction"])
                 logger.info(
                     f"Found {len(docs)} existing machine-generated instruction(s)."
                 )
@@ -322,27 +323,25 @@ async def _post_no_exc(self, *a, **k):
             logger.error(f"Error performing post: {ex}")
         return None
 
-    async def generate_response(
-        self, instruction: str, messages: List[Dict[str, Any]] = [], **kwargs
-    ) -> str:
+    async def generate_response(self, instruction: str, **kwargs) -> str:
         """Call OpenAI with the specified instruction and return the text response.
 
         :param instruction: The instruction to respond to.
         :type instruction: str
 
-        :param messages: Any previous messages/system prompt.
-        :type messages: List[Dict[str, Any]]
-
         :return: Response text.
         :rtype: str
         """
+        messages = kwargs.pop("messages", None) or []
         filter_response = kwargs.pop("filter_response", True)
         model = kwargs.get("model", self.model)
         path = "/v1/chat/completions"
         payload = {**kwargs}
         if "model" not in payload:
             payload["model"] = model
-        payload["messages"] = messages + [{"role": "user", "content": instruction}]
+        payload["messages"] = messages
+        if instruction:
+            payload["messages"].append({"role": "user", "content": instruction})
         response = await self._post_no_exc(path, payload)
         if (
             not response
@@ -392,15 +391,16 @@ def persist(self, item):
         """Persist a single item to the output file and docstore."""
         self.outfile.write(json.dumps(item) + "\n")
         self.outfile.flush()
-        self.docstores[-1].add_texts([item["instruction"]])
-        self.docstore_size += 1
+        if item["category"] != "chat":
+            self.docstores[-1].add_texts([item["instruction"]])
+            self.docstore_size += 1
+            if self.docstore_size >= MAX_DOCSTORE_SIZE:
+                logger.info("Initializing new docstore...")
+                self.docstores.append(
+                    Chroma.from_texts(["__initialize__"], self.embeddings)
+                )
+                self.docstore_size = 0
         self.instructor_counts[item["category"]] += 1
-        if self.docstore_size >= MAX_DOCSTORE_SIZE:
-            logger.info("Initializing new docstore...")
-            self.docstores.append(
-                Chroma.from_texts(["__initialize__"], self.embeddings)
-            )
-            self.docstore_size = 0
 
     async def run_instructor(self, category, method_map):
         """Run a single instructor, as an async task."""
@@ -413,8 +413,13 @@ async def run_instructor(self, category, method_map):
         async for item in method_map[category](self):
             self.persist(item)
             running_total += 1
+            preview = None
+            if category != "chat":
+                preview = item["instruction"][:100]
+            else:
+                preview = item["chat"][0]["content"].splitlines()[0]
             logger.success(
-                f"Generated unique instruction [{category}, total={running_total}]: {item['instruction'][:100]}"
+                f"Generated unique instruction [{category}, total={running_total}]: {preview}"
             )
         delta = (datetime.datetime.now() - started_at).total_seconds()
         logger.success(
@@ -425,6 +430,7 @@ async def run(self):
         """Run prompt generation and answer to completion."""
         from airoboros.instructors.agent import generate as agent_generator
         from airoboros.instructors.card import generate as card_generator
+        from airoboros.instructors.chat import generate as chat_generator
         from airoboros.instructors.coding import generate as coding_generator
         from airoboros.instructors.contextual import generate as contextual_generator
         from airoboros.instructors.cot import generate as cot_generator
@@ -452,6 +458,7 @@ async def run(self):
         method_map = {
             "agent": agent_generator,
             "card": card_generator,
+            "chat": chat_generator,
             "coding": coding_generator,
             "contextual": contextual_generator,
             "cot": cot_generator,

diff --git a/example-config.yaml b/example-config.yaml
@@ -351,7 +351,6 @@ instructors:
     batch_size: 25
     min_docsearch_score: 0.25
 
-
   ##################################################################################
   # Multiple choice.
   multiple_choice:
@@ -362,12 +361,32 @@ instructors:
     min_docsearch_score: 0.1
     contextual_ratio: 0.2
 
-
   ##################################################################################
   # Detailed writing.
   detailed_writing:
     api_params:
       temperature: 1.0
-    batch_size: 4
+    batch_size: 3
     count: 100
     min_docsearch_score: 0.1
+
+  ##################################################################################
+  # Chat cards - these aren't used directly, they are stored in output_dir, and used
+  # by the chat instructor.
+  chat_card:
+    api_params:
+      temperature: 0.9
+    count: 25
+    batch_size: 1
+    min_docsearch_score: 0.1
+    output_dir: chat_cards
+
+  ##################################################################################
+  # Chats - this is $$$, many calls, use with care.
+  chat:
+    api_params:
+      temperature: 0.9
+      presence_penalty: 1.3
+      frequency_penalty: 1.3
+    count: 25
+    turn_count: 15
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name="airoboros",
-    version="2.0.18",
+    version="2.0.19",
     description="Updated and improved implementation of the self-instruct system.",
     long_description=long_description,
     long_description_content_type="text/markdown",