langchain-ai · hwchase17 · Mar 16, 2024 · Mar 16, 2024 · Mar 16, 2024 · Mar 16, 2024
diff --git a/README.md b/README.md
@@ -76,6 +76,9 @@ This project supports a Docker-based setup, streamlining installation and execut
    ```
 
    This command builds the Docker images for the frontend and backend from their respective Dockerfiles and starts all necessary services, including Postgres.
+
+   **NOTE**: Make sure your `docker compose version` is greater than or equal v2.24.6. If you're using Docker Desktop you can just upgrade your Docker Desktop version. See here for more on [installing Docker Compose](https://docs.docker.com/compose/install/).
+
 
 5. **Access the Application:**  
    With the services running, access the frontend at [http://localhost:5173](http://localhost:5173), substituting `5173` with the designated port number.

diff --git a/backend/app/agent.py b/backend/app/agent.py
@@ -1,3 +1,4 @@
+import random
 from enum import Enum
 from typing import Any, Mapping, Optional, Sequence, Union
 
@@ -7,6 +8,8 @@
     RunnableBinding,
 )
 from langgraph.checkpoint import CheckpointAt
+from langsmith import Client as LangSmithClient
+from langsmith.schemas import Example
 
 from app.agent_types.google_agent import get_google_agent_executor
 from app.agent_types.openai_agent import get_openai_agent_executor
@@ -70,12 +73,65 @@ class AgentType(str, Enum):
 CHECKPOINTER = PostgresCheckpoint(at=CheckpointAt.END_OF_STEP)
 
 
+def _format_chat_example(example: Example) -> str:
+    feedback = ""
+    for i in example.inputs["input"][1:]:
+        if i["type"] == "human":
+            feedback += "<human_feedback>\n" + i["content"] + "\n</human_feedback>\n"
+    return f"""<original_input>
+{example.inputs['input'][0]['content']}
+</original_input>
+{feedback}<output>
+{example.outputs['output']['content']}
+</output>"""
+
+
+def _format_agent_example(example: Example) -> str:
+    new_messages = []
+    for o in example.outputs["output"][1:][::-1]:
+        if o["type"] == "human":
+            break
+        new_messages.append(o)
+    return f"""<trajectory>
+{[example.outputs['output'][0]] + new_messages[::-1]}
+</trajectory>"""
+
+
+def get_few_shot_str(assistant_id: str, *, agent: bool = False) -> str:
+    client = LangSmithClient()
+    if client.has_dataset(dataset_name=assistant_id):
+        examples = list(client.list_examples(dataset_name=assistant_id))
+        if not examples:
+            return ""
+        # TODO: Make this not random. Could be latest, could use some similarity
+        #   measure.
+        examples = random.sample(examples, min(len(examples), 5))
+        if agent:
+            example_str = "\n".join([_format_agent_example(e) for e in examples])
+        else:
+            example_str = "\n".join([_format_chat_example(e) for e in examples])
+        return f"""Here are some previous interactions with a user trying to accomplish a similar task. \
+You should assume that the final output is the desired one, and any \
+intermediate steps were wrong in some way, and the human then tried to improve upon \
+them in specific ways. Learn from these previous interactions and do not repeat past \
+mistakes!
+
+{example_str}
+"""
+
+
 def get_agent_executor(
     tools: list,
     agent: AgentType,
     system_message: str,
     interrupt_before_action: bool,
+    *,
+    assistant_id: Optional[str] = None,
+    self_learning: bool = False,
 ):
+    if self_learning and assistant_id is not None:
+        system_message += "\n\n" + get_few_shot_str(assistant_id, agent=True)
+
     if agent == AgentType.GPT_35_TURBO:
         llm = get_openai_llm()
         return get_openai_agent_executor(
@@ -119,6 +175,7 @@ class ConfigurableAgent(RunnableBinding):
     assistant_id: Optional[str] = None
     thread_id: Optional[str] = None
     user_id: Optional[str] = None
+    self_learning: bool = False
 
     def __init__(
         self,
@@ -130,6 +187,7 @@ def __init__(
         thread_id: Optional[str] = None,
         retrieval_description: str = RETRIEVAL_DESCRIPTION,
         interrupt_before_action: bool = False,
+        self_learning: bool = False,
         kwargs: Optional[Mapping[str, Any]] = None,
         config: Optional[Mapping[str, Any]] = None,
         **others: Any,
@@ -153,7 +211,12 @@ def __init__(
                 else:
                     _tools.append(_returned_tools)
         _agent = get_agent_executor(
-            _tools, agent, system_message, interrupt_before_action
+            _tools,
+            agent,
+            system_message,
+            interrupt_before_action,
+            assistant_id=assistant_id,
+            self_learning=self_learning,
         )
         agent_executor = _agent.with_config({"recursion_limit": 50})
         super().__init__(
@@ -180,6 +243,9 @@ class LLMType(str, Enum):
 def get_chatbot(
     llm_type: LLMType,
     system_message: str,
+    *,
+    assistant_id: Optional[str] = None,
+    self_learning: bool = False,
 ):
     if llm_type == LLMType.GPT_35_TURBO:
         llm = get_openai_llm()
@@ -197,26 +263,36 @@ def get_chatbot(
         llm = get_mixtral_fireworks()
     else:
         raise ValueError("Unexpected llm type")
+
+    if self_learning and assistant_id:
+        system_message += "\n\n" + get_few_shot_str(assistant_id)
+
     return get_chatbot_executor(llm, system_message, CHECKPOINTER)
 
 
 class ConfigurableChatBot(RunnableBinding):
     llm: LLMType
     system_message: str = DEFAULT_SYSTEM_MESSAGE
     user_id: Optional[str] = None
+    assistant_id: Optional[str] = None
+    self_learning: bool = False
 
     def __init__(
         self,
         *,
         llm: LLMType = LLMType.GPT_35_TURBO,
         system_message: str = DEFAULT_SYSTEM_MESSAGE,
+        assistant_id: Optional[str] = None,
+        self_learning: bool = False,
         kwargs: Optional[Mapping[str, Any]] = None,
         config: Optional[Mapping[str, Any]] = None,
         **others: Any,
     ) -> None:
         others.pop("bound", None)
 
-        chatbot = get_chatbot(llm, system_message)
+        chatbot = get_chatbot(
+            llm, system_message, assistant_id=assistant_id, self_learning=self_learning
+        )
         super().__init__(
             llm=llm,
             system_message=system_message,
@@ -231,6 +307,14 @@ def __init__(
     .configurable_fields(
         llm=ConfigurableField(id="llm_type", name="LLM Type"),
         system_message=ConfigurableField(id="system_message", name="Instructions"),
+        assistant_id=ConfigurableField(
+            id="assistant_id", name="Assistant ID", is_shared=True
+        ),
+        self_learning=ConfigurableField(
+            id="self_learning",
+            name="Self-learning",
+            description="A self-learning GPT is one that will learn use user feedback to improve over time.",
+        ),
     )
     .with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage])
 )
@@ -291,12 +375,14 @@ def __init__(
             id="assistant_id", name="Assistant ID", is_shared=True
         ),
         thread_id=ConfigurableField(id="thread_id", name="Thread ID", is_shared=True),
+        # TODO: Add support
+        # self_learning=ConfigurableField(id="self_learning", name="Self-learning")
     )
     .with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage])
 )
 
 
-agent = (
+agent_w_tools = (
     ConfigurableAgent(
         agent=AgentType.GPT_35_TURBO,
         tools=[],
@@ -321,17 +407,24 @@ def __init__(
         retrieval_description=ConfigurableField(
             id="retrieval_description", name="Retrieval Description"
         ),
-    )
-    .configurable_alternatives(
-        ConfigurableField(id="type", name="Bot Type"),
-        default_key="agent",
-        prefix_keys=True,
-        chatbot=chatbot,
-        chat_retrieval=chat_retrieval,
+        self_learning=ConfigurableField(
+            id="self_learning",
+            name="Self-learning",
+            description="A self-learning GPT is one that will learn use user feedback to improve over time.",
+        ),
     )
     .with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage])
 )
 
+
+agent = agent_w_tools.configurable_alternatives(
+    ConfigurableField(id="type", name="Bot Type"),
+    default_key="agent",
+    prefix_keys=True,
+    chatbot=chatbot,
+    chat_retrieval=chat_retrieval,
+).with_types(input_type=Sequence[AnyMessage], output_type=Sequence[AnyMessage])
+
 if __name__ == "__main__":
     import asyncio
 

diff --git a/backend/app/api/assistants.py b/backend/app/api/assistants.py
@@ -1,7 +1,9 @@
-from typing import Annotated, List, Optional
+import os
+from typing import Annotated, List, Literal, Optional
 from uuid import uuid4
 
 from fastapi import APIRouter, HTTPException, Path, Query
+from langsmith import Client as LangSmithClient
 from pydantic import BaseModel, Field
 
 import app.storage as storage
@@ -68,13 +70,52 @@ async def create_assistant(
     )
 
 
+def _create_few_shot_dataset_and_rule(
+    aid: AssistantID, assistant_type: Literal["agent", "chatbot"]
+) -> None:
+    client = LangSmithClient()
+    dataset = client.create_dataset(aid)
+    eq_filters = [
+        ("feedback_key", '"user_score"'),
+        ("feedback_score", 1),
+        ("metadata_key", '"assistant_id"'),
+        ("metadata_value", f'"{aid}"'),
+    ]
+    formatted_eq_filters = ", ".join(f"eq({attr}, {val})" for attr, val in eq_filters)
+    user_liked_filter = f"and({formatted_eq_filters})"
+    session_id = client.read_project(project_name=os.environ["LANGCHAIN_PROJECT"]).id
+    payload = {
+        "display_name": f"few shot {aid}",
+        "session_id": str(session_id),
+        "sampling_rate": 1,
+        "add_to_dataset_id": str(dataset.id),
+    }
+    if assistant_type == "agent":
+        payload["filter"] = user_liked_filter
+    elif assistant_type == "chatbot":
+        payload["filter"] = 'eq(name, "chatbot")'
+        payload["trace_filter"] = user_liked_filter
+    else:
+        raise ValueError(
+            f"Unknown assistant_type {assistant_type}. Expected 'agent' or 'chatbot'."
+        )
+    client.request_with_retries(
+        "POST",
+        client.api_url + "/runs/rules",
+        {"json": payload, "headers": client._headers},
+    )
+
+
 @router.put("/{aid}")
 async def upsert_assistant(
     opengpts_user_id: OpengptsUserId,
     aid: AssistantID,
     payload: AssistantPayload,
 ) -> Assistant:
     """Create or update an assistant."""
+    assistant_type = payload.config["configurable"]["type"]
+    if payload.config["configurable"][f"type=={assistant_type}/self_learning"]:
+        _create_few_shot_dataset_and_rule(aid, payload.config["configurable"]["type"])
     return await storage.put_assistant(
         opengpts_user_id,
         aid,

diff --git a/backend/app/checkpoint.py b/backend/app/checkpoint.py
@@ -1,10 +1,10 @@
-from datetime import datetime
 import pickle
+from datetime import datetime
 from typing import AsyncIterator, Optional
 
 from langchain_core.runnables import ConfigurableFieldSpec, RunnableConfig
 from langgraph.checkpoint import BaseCheckpointSaver
-from langgraph.checkpoint.base import Checkpoint, CheckpointTuple, CheckpointThreadTs
+from langgraph.checkpoint.base import Checkpoint, CheckpointThreadTs, CheckpointTuple
 
 from app.lifespan import get_pg_pool
 

diff --git a/backend/app/upload.py b/backend/app/upload.py
@@ -11,7 +11,6 @@
 import os
 from typing import Any, BinaryIO, List, Optional
 
-from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
 from langchain_community.document_loaders.blob_loaders.schema import Blob
 from langchain_community.vectorstores.pgvector import PGVector
 from langchain_core.runnables import (
@@ -21,6 +20,7 @@
 )
 from langchain_core.vectorstores import VectorStore
 from langchain_openai import OpenAIEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
 
 from app.ingest import ingest_blob
 from app.parsing import MIMETYPE_BASED_PARSER

diff --git a/backend/tests/unit_tests/app/test_app.py b/backend/tests/unit_tests/app/test_app.py
@@ -44,13 +44,17 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None:
         # Create an assistant
         response = await client.put(
             f"/assistants/{aid}",
-            json={"name": "bobby", "config": {}, "public": False},
+            json={
+                "name": "bobby",
+                "config": {"configurable": {"type": "agent", "self_learning": False}},
+                "public": False,
+            },
             headers=headers,
         )
         assert response.status_code == 200
         assert _project(response.json(), exclude_keys=["updated_at"]) == {
             "assistant_id": aid,
-            "config": {},
+            "config": {"configurable": {"type": "agent", "self_learning": False}},
             "name": "bobby",
             "public": False,
             "user_id": "1",
@@ -62,7 +66,7 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None:
         assert [_project(d, exclude_keys=["updated_at"]) for d in response.json()] == [
             {
                 "assistant_id": aid,
-                "config": {},
+                "config": {"configurable": {"type": "agent", "self_learning": False}},
                 "name": "bobby",
                 "public": False,
                 "user_id": "1",
@@ -71,13 +75,22 @@ async def test_list_and_create_assistants(pool: asyncpg.pool.Pool) -> None:
 
         response = await client.put(
             f"/assistants/{aid}",
-            json={"name": "bobby", "config": {}, "public": False},
+            json={
+                "name": "bobby",
+                "config": {
+                    "configurable": {
+                        "type": "chatbot",
+                        "self_learning": False,
+                    }
+                },
+                "public": False,
+            },
             headers=headers,
         )
 
         assert _project(response.json(), exclude_keys=["updated_at"]) == {
             "assistant_id": aid,
-            "config": {},
+            "config": {"configurable": {"type": "chatbot", "self_learning": False}},
             "name": "bobby",
             "public": False,
             "user_id": "1",
@@ -99,7 +112,11 @@ async def test_threads() -> None:
     async with get_client() as client:
         response = await client.put(
             f"/assistants/{aid}",
-            json={"name": "assistant", "config": {}, "public": False},
+            json={
+                "name": "assistant",
+                "config": {"configurable": {"type": "agent", "self_learning": False}},
+                "public": False,
+            },
             headers=headers,
         )
 

diff --git a/frontend/index.html b/frontend/index.html
@@ -3,7 +3,7 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>OpenGPTs</title>
+    <title>Self Learning GPTs</title>
     <link rel="stylesheet" href="https://rsms.me/inter/inter.css">
   </head>
   <body>