From 8d0e74b4396d942783101f49dee182d213dd0e46 Mon Sep 17 00:00:00 2001 From: Yuxuan Xia <77518229+NovTi@users.noreply.github.com> Date: Wed, 31 Jan 2024 11:31:18 +0800 Subject: [PATCH] Add AutoGen CPU and XPU Example (#9980) * Add AutoGen example * Adjust AutoGen README * Adjust AutoGen README * Change AutoGen README * Change AutoGen README --- .../CPU/Applications/autogen/README.md | 174 ++++++++++++++++ .../autogen/teachability_new_knowledge.py | 92 +++++++++ .../GPU/Applications/autogen/README.md | 195 ++++++++++++++++++ .../autogen/teachability_new_knowledge.py | 91 ++++++++ 4 files changed, 552 insertions(+) create mode 100644 python/llm/example/CPU/Applications/autogen/README.md create mode 100644 python/llm/example/CPU/Applications/autogen/teachability_new_knowledge.py create mode 100644 python/llm/example/GPU/Applications/autogen/README.md create mode 100644 python/llm/example/GPU/Applications/autogen/teachability_new_knowledge.py diff --git a/python/llm/example/CPU/Applications/autogen/README.md b/python/llm/example/CPU/Applications/autogen/README.md new file mode 100644 index 00000000000..9bc040a3769 --- /dev/null +++ b/python/llm/example/CPU/Applications/autogen/README.md @@ -0,0 +1,174 @@ +## Running AutoGen Agent Chat with BigDL-LLM on Local Models +In this example, we use BigDL adapted FastChat to run [AutoGen](https://microsoft.github.io/autogen/) agent chat with +local large language models. + +### 1. Setup BigDL-LLM Environment +```bash +# create autogen running directory +mkdir autogen +cd autogen + +# create respective conda environment +conda create -n autogen python=3.9 +conda activate autogen + +# install fastchat-adapted bigdl-llm +# we recommend using bigdl-llm version >= 2.5.0b20240110 +pip install --pre --upgrade bigdl-llm[serving] + +# install recommend transformers version +pip install transformers==4.36.2 + +# install necessary dependencies +pip install chromadb==0.4.22 +``` + +### 2. Setup FastChat and AutoGen Environment +```bash +# clone the FastChat in the autogen folder +git clone https://github.com/lm-sys/FastChat.git FastChat # clone the FastChat +cd FastChat +pip3 install --upgrade pip # enable PEP 660 support +pip3 install -e ".[model_worker,webui]" + +# setup AutoGen environment +pip install pyautogen==0.2.7 +``` + +**After setting up the environment, the folder structure should be:** +> -- autogen +> | -- FastChat + + +### 3. Build FastChat OpenAI-Compatible RESTful API +Open 3 terminals + +**Terminal 1: Launch the controller** + +```bash +# activate conda environment +conda activate autogen + +# go to the cloned FastChat folder in autogen folder +cd autogen/FastChat + +python -m fastchat.serve.controller +``` + +**Terminal 2: Launch the workers** + +**Change the Model Name:** +> Assume you are using the model `Mistral-7B-Instruct-v0.2` and your model is downloaded to `autogen/model/Mistral-7B-Instruct-v0.2`. You should rename the model to `autogen/model/bigdl` and run `python -m bigdl.llm.serving.model_worker --model-path ... --device cpu`. This ensures the proper usage of the BigDL adapted FastChat. + +```bash +# activate conda environment +conda activate autogen + +# go to the created autogen folder +cd autogen + +# load the local model with cpu with your downloaded model +python -m bigdl.llm.serving.model_worker --model-path ... --device cpu +``` + +**Potential Error Note:** +> If you get `RuntimeError: Error register to Controller` in the worker terminal, please set `export no_proxy='localhost'` to ensure the registration + + +**Terminal 3: Launch the server** + +```bash +# activate conda environment +conda activate autogen + +# go to the cloned FastChat folder in autogen folder +cd autogen/FastChat + +python -m fastchat.serve.openai_api_server --host localhost --port 8000 +``` + +### 4. Run Example +Open another terminal + +```bash +# activate conda environment +conda activate autogen + +# go to the autogen folder +cd autogen + +# run the autogen example +python teachability_new_knowledge.py +``` + +**Potential Error Note:** +> If you get `?bu=http://localhost:8000/v1/chat/completions&bc=Failed+to+retrieve+requested+URL.&ip=10.239.44.101&er=ERR_CONNECT_FAIL` in the running terminal, please set `export no_proxy='localhost'` to ensure the registration. + + +## Sample Output + +**Using `Mistral-7B-Instruct-v0.2` model on Intel i9-12900K** + +```bash +CLEARING MEMORY +user (to teachable_agent): + +What is the Vicuna model? + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +I apologize for any confusion, but I don't have enough context or prior information from our conversations to know specifically what you mean by "the Vicuna model." Vicunas are a species of camelid native to South America, but there is no known statistical or machine learning model named after them in the field of data science or artificial intelligence. If you could please provide more context or details about what you mean by "the Vicuna model," I would be happy to help you with any related questions or information you might have. + +-------------------------------------------------------------------------------- +user (to teachable_agent): + +Vicuna is a 13B-parameter language model released by Meta. + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +Thank you for providing the additional context. Based on the new information, the Vicuna model is a 13B-parameter language model developed and released by Meta (formerly Facebook). However, I would need to access more detailed information about this model to fully understand its capabilities, architecture, and applications. If you have any specific questions or details about the Vicuna model, feel free to ask and I will do my best to help you. + +-------------------------------------------------------------------------------- +user (to teachable_agent): + +What is the Orca model? + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +I apologize for any confusion, but I cannot directly answer your question about the Orca model without more context or information. The term "Orca model" is not mentioned in the context provided in our previous conversation, which was about the Vicuna model being a 13B-parameter language model released by Meta. If you could please provide more details or context about the Orca model, such as who developed it, when it was released, and what type of model it is, I would be happy to help you with any related questions or information you might have. + +-------------------------------------------------------------------------------- +user (to teachable_agent): + +Orca is a 13B-parameter language model developed by Microsoft. It outperforms Vicuna on most tasks. + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +Thank you for providing the additional context about the Orca model being a 13B-parameter language model developed by Microsoft that outperforms the Vicuna model on most tasks. With this information, we can infer that both the Vicuna and Orca models are large-scale language models with a similar number of parameters, and that the Orca model has better performance based on the tasks it has been evaluated on. However, without more specific details about the models' architectures, capabilities, and applications, it is difficult to provide a comprehensive comparison or analysis. If you have any specific questions or details about the Vicuna or Orca models, feel free to ask and I will do my best to help you. + +-------------------------------------------------------------------------------- +user (to teachable_agent): + +How does the Vicuna model compare to the Orca model? + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +Based on the given memories, the Vicuna model and the Orca model are both 13B-parameter language models, meaning they have similar capacity and architecture. However, the text states that the Orca model, developed by Microsoft, outperforms the Vicuna model on most tasks. Therefore, the Orca model can be considered more advanced or effective than the Vicuna model based on the provided information. It's important to note that this comparison is based on the specific task or set of tasks mentioned in the text, and the performance of the models may vary depending on the specific use case or dataset. + +-------------------------------------------------------------------------------- +``` \ No newline at end of file diff --git a/python/llm/example/CPU/Applications/autogen/teachability_new_knowledge.py b/python/llm/example/CPU/Applications/autogen/teachability_new_knowledge.py new file mode 100644 index 00000000000..f902e6b431e --- /dev/null +++ b/python/llm/example/CPU/Applications/autogen/teachability_new_knowledge.py @@ -0,0 +1,92 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import autogen +from autogen import ConversableAgent, UserProxyAgent +from autogen.agentchat.contrib.capabilities.teachability import Teachability + +autogen.Completion.clear_cache() + +config_list = [ + { + "api_key": "NULL", + + # ----------- fastchat + "model": "bigdl", + "base_url": "http://localhost:8000/v1", + + # ----------- vllm + # "model": "hello", + # "base_url": "http://localhost:65533/v1", + }] + +llm_config={ + "config_list": config_list, + "timeout": 1000, + "max_tokens": 256, + "cache_seed": None, # Disable caching. + "seed": 2024, + "temperature": 0, +} + + +# Start by instantiating any agent that inherits from ConversableAgent. +teachable_agent = ConversableAgent( + name="teachable_agent", # The name is flexible, but should not contain spaces to work in group chat. + llm_config=llm_config, +) + +# Instantiate the Teachability capability. Its parameters are all optional. +teachability = Teachability( + verbosity=0, # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. + reset_db=True, + path_to_db_dir="./tmp/autogen/teachability_db", + recall_threshold=1.5, # Higher numbers allow more (but less relevant) memos to be recalled. +) + +# Now add the Teachability capability to the agent. +teachability.add_to_agent(teachable_agent) + +try: + from termcolor import colored +except ImportError: + + def colored(x, *args, **kwargs): + return x + + +# Instantiate a UserProxyAgent to represent the user. But in this notebook, all user input will be simulated. +user = UserProxyAgent( + name="user", + human_input_mode="NEVER", + is_termination_msg=lambda x: True if "TERMINATE" in x.get("content") else False, + max_consecutive_auto_reply=0, +) + +text = "What is the Vicuna model?" +user.initiate_chat(teachable_agent, message=text, clear_history=True) + +text = "Vicuna is a 13B-parameter language model released by Meta." +user.initiate_chat(teachable_agent, message=text, clear_history=False) + +text = "What is the Orca model?" +user.initiate_chat(teachable_agent, message=text, clear_history=False) + +text = "Orca is a 13B-parameter language model developed by Microsoft. It outperforms Vicuna on most tasks." +user.initiate_chat(teachable_agent, message=text, clear_history=False) + +text = "How does the Vicuna model compare to the Orca model?" +user.initiate_chat(teachable_agent, message=text, clear_history=True) \ No newline at end of file diff --git a/python/llm/example/GPU/Applications/autogen/README.md b/python/llm/example/GPU/Applications/autogen/README.md new file mode 100644 index 00000000000..1788aeffde6 --- /dev/null +++ b/python/llm/example/GPU/Applications/autogen/README.md @@ -0,0 +1,195 @@ +# Running AutoGen Agent Chat with BigDL-LLM on Local Models +In this example, we use BigDL adapted FastChat to run [AutoGen](https://microsoft.github.io/autogen/) agent chat with +local large language models. + +### 1. Setup BigDL-LLM Environment +```bash +# create autogen running directory +mkdir autogen +cd autogen + +# create respective conda environment +conda create -n autogen python=3.9 +conda activate autogen + +# install xpu-supported and fastchat-adapted bigdl-llm +# we recommend using bigdl-llm version >= 2.5.0b20240110 +pip install --pre --upgrade bigdl-llm[xpu,serving] -f https://developer.intel.com/ipex-whl-stable-xpu + +# install recommend transformers version +pip install transformers==4.36.2 + +# install necessary dependencies +pip install chromadb==0.4.22 +``` + + +### 2. Setup FastChat and AutoGen Environment +```bash +# clone the FastChat in the autogen folder +git clone https://github.com/lm-sys/FastChat.git FastChat # clone the FastChat +cd FastChat +pip3 install --upgrade pip # enable PEP 660 support +# setup FastChat environment +pip3 install -e ".[model_worker,webui]" + +# setup AutoGen environment +pip install pyautogen==0.2.7 +``` + +**After setting up the environment, the folder structure should be:** +> -- autogen +> | -- FastChat + + +### 3. Build FastChat OpenAI-Compatible RESTful API +Open 3 terminals + +**Terminal 1: Launch the controller** + +```bash +# activate conda environment +conda activate autogen + +# go to the cloned FastChat folder in autogen folder +cd autogen/FastChat + +python -m fastchat.serve.controller +``` + +**Terminal 2: Launch the workers** + +**Model Name Note:** +> Assume you are using the model `Mistral-7B-Instruct-v0.2` and your model is downloaded to `autogen/model/Mistral-7B-Instruct-v0.2`. You should rename the model to `autogen/model/bigdl` and run `python -m bigdl.llm.serving.model_worker --model-path ... --device xpu`. This ensures the proper usage of the BigDL adapted FastChat. + +```bash +# activate conda environment +conda activate autogen + +# go to the created autogen folder +cd autogen + +# load the local model with xpu with your downloaded model +python -m bigdl.llm.serving.model_worker --model-path ... --device xpu +``` + +**Device Note:** +> Please set `--device` to `xpu` to enable the Intel GPU usage. + +**Potential Error Note:** +> If you get `RuntimeError: Error register to Controller` in the worker terminal, please set `export no_proxy='localhost'` to ensure the registration + + +**Potential Error Note:** + +**Terminal 3: Launch the server** + +```bash +# activate conda environment +conda activate autogen + +# go to the cloned FastChat folder in autogen folder +cd autogen/FastChat + +python -m fastchat.serve.openai_api_server --host localhost --port 8000 +``` + +### 4. Run Example +Open another terminal + +```bash +# activate conda environment +conda activate autogen + +# go to the autogen folder +cd autogen + +# run the autogen example +python teachability_new_knowledge.py +``` + +**Potential Error Note:** +> If you get `?bu=http://localhost:8000/v1/chat/completions&bc=Failed+to+retrieve+requested+URL.&ip=10.239.44.101&er=ERR_CONNECT_FAIL` in the running terminal, please set `export no_proxy='localhost'` to ensure the registration. + + + +## Sample Output + +**Using `Mistral-7B-Instruct-v0.2` model on Intel ARC A770** + +```bash +CLEARING MEMORY +user (to teachable_agent): + +What is the Vicuna model? + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +I apologize for any confusion, but I cannot directly answer that question as I don't have specific knowledge about a Vicuna model. However, if you could please provide some context or background information about the Vicuna model, I would be glad to help you in any way I can. Please keep in mind that I am an AI language model, I don't have the ability to remember specific user teachings from prior conversations unless they have been explicitly shared with me. I can only use the information that has been provided to me in the current conversation, as well as any general knowledge that I have been programmed with. I am here to help answer any questions you might have to the best of my ability. If you have any specific questions, please don't hesitate to ask. I will do my best to provide you with accurate and helpful information. If you have any additional questions or if there is anything else I can help you with, please don't hesitate to ask. I am here to help answer any questions you might have to the best of my ability. I hope this information was helpful to you. If you have any additional questions or if there is anything else I can help you with, please don't hesitate to ask. + +-------------------------------------------------------------------------------- +user (to teachable_agent): + +Vicuna is a 13B-parameter language model released by Meta. + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +The Vicuna model is a 13B-parameter language model released by Meta. + +-------------------------------------------------------------------------------- +user (to teachable_agent): + +What is the Orca model? + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +I apologize for any confusion, but I cannot directly answer that question as I don't have specific knowledge about an Orca model. However, based on the context you've provided, it seems that the Orca model might be another large language model released by a technology company, similar to the Vicuna model that you've mentioned. I hope this information is helpful to you. Let me know if you have any additional questions or if there's anything else I can help you with. + +-------------------------------------------------------------------------------- +user (to teachable_agent): + +Orca is a 13B-parameter language model developed by Microsoft. It outperforms Vicuna on most tasks. + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +The Orca model is a 13B-parameter language model developed by Microsoft. It outperforms the Vicuna model on most tasks. + +# Memories that might help +- What is the Vicuna model? +* Vicuna is a 13B-parameter language model +* Released by Meta. +- What is the Orca model? +* Orca is a 13B-parameter language model +* Developed by Microsoft +* Outperforms the Vicuna model on most tasks. + +-------------------------------------------------------------------------------- +user (to teachable_agent): + +How does the Vicuna model compare to the Orca model? + +-------------------------------------------------------------------------------- + +>>>>>>>> USING AUTO REPLY... +teachable_agent (to user): + +The Vicuna model and the Orca model are both large-scale language models developed by different organizations. + +The Vicuna model is a 13B-parameter language model released by Meta. It's designed to generate human-like text based on given inputs. + +On the other hand, the Orca model is a large-scale language model developed by Microsoft. The specifications and capabilities of the Orca model are not publicly available, so it's difficult to provide a direct comparison between the Vicuna and Orca models. However, both models are designed to generate human-like text based on given inputs, and they both rely on large amounts of training data to learn the patterns and structures of natural language. + +-------------------------------------------------------------------------------- +``` \ No newline at end of file diff --git a/python/llm/example/GPU/Applications/autogen/teachability_new_knowledge.py b/python/llm/example/GPU/Applications/autogen/teachability_new_knowledge.py new file mode 100644 index 00000000000..91a3f7847dd --- /dev/null +++ b/python/llm/example/GPU/Applications/autogen/teachability_new_knowledge.py @@ -0,0 +1,91 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import autogen +from autogen import ConversableAgent, UserProxyAgent +from autogen.agentchat.contrib.capabilities.teachability import Teachability + +autogen.Completion.clear_cache() + +config_list = [ + { + "api_key": "NULL", + + # ----------- fastchat + "model": "bigdl", + "base_url": "http://localhost:8000/v1", + + # ----------- vllm + # "model": "hello", + # "base_url": "http://localhost:65533/v1", + }] + +llm_config={ + "config_list": config_list, + "timeout": 1000, + "max_tokens": 256, + "cache_seed": None, # Disable caching. + "seed": 2024, + "temperature": 0, +} + +# Start by instantiating any agent that inherits from ConversableAgent. +teachable_agent = ConversableAgent( + name="teachable_agent", # The name is flexible, but should not contain spaces to work in group chat. + llm_config=llm_config, +) + +# Instantiate the Teachability capability. Its parameters are all optional. +teachability = Teachability( + verbosity=0, # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists. + reset_db=True, + path_to_db_dir="./tmp/autogen/teachability_db", + recall_threshold=1.5, # Higher numbers allow more (but less relevant) memos to be recalled. +) + +# Now add the Teachability capability to the agent. +teachability.add_to_agent(teachable_agent) + +try: + from termcolor import colored +except ImportError: + + def colored(x, *args, **kwargs): + return x + + +# Instantiate a UserProxyAgent to represent the user. But in this notebook, all user input will be simulated. +user = UserProxyAgent( + name="user", + human_input_mode="NEVER", + is_termination_msg=lambda x: True if "TERMINATE" in x.get("content") else False, + max_consecutive_auto_reply=0, +) + +text = "What is the Vicuna model?" +user.initiate_chat(teachable_agent, message=text, clear_history=True) + +text = "Vicuna is a 13B-parameter language model released by Meta." +user.initiate_chat(teachable_agent, message=text, clear_history=False) + +text = "What is the Orca model?" +user.initiate_chat(teachable_agent, message=text, clear_history=False) + +text = "Orca is a 13B-parameter language model developed by Microsoft. It outperforms Vicuna on most tasks." +user.initiate_chat(teachable_agent, message=text, clear_history=False) + +text = "How does the Vicuna model compare to the Orca model?" +user.initiate_chat(teachable_agent, message=text, clear_history=True) \ No newline at end of file