Remove fast-sentence-transformers library, distinction between gpu re…

…quirements, misc fixes, model list update.
jondurbin · Mar 7, 2024 · 169e8a9 · 169e8a9
1 parent 29aa8c7
commit 169e8a9
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 19 deletions.
diff --git a/airoboros/self_instruct.py b/airoboros/self_instruct.py
@@ -33,9 +33,9 @@
     ContextLengthExceededError,
     BadResponseError,
 )
-from fast_sentence_transformers import FastSentenceTransformer
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer
+from txtai.pipeline import HFOnnx
 
 # Defaults and constants.
 MAX_DOCSTORE_SIZE = 15000
@@ -44,18 +44,20 @@
 
 # List of OpenAI models we support (there are others, but skipping for now...)
 OPENAI_MODELS = [
-    "gpt-3.5-turbo-0613",
-    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-16k-0613",
+    "gpt-3.5-turbo-0125",
     "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4",
     "gpt-4-32k-0314",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-0301",
     "gpt-4-1106-preview",
-    "gpt-4",
-    "gpt-4-32k",
-    "gpt-4-0613",
-    "gpt-4-32k-0613",
+    "gpt-4-turbo-preview",
     "gpt-3.5-turbo-1106",
-    "gpt-3.5-turbo",
     "gpt-3.5-turbo-16k",
+    "gpt-4-0125-preview",
 ]
 
 # Base URL for vertexai.
@@ -151,13 +153,10 @@ def load_config(self):
         # Hacky, but we'll load this twice, the first time to get dimension, since
         # it's not accessible in the Fast (cpu) version.
         model = SentenceTransformer(model_name)
-        self.embedding_dimension = model.get_sentence_embedding_dimension()
-        model = None
-        if raw_config.get("embedding_device") == "cuda":
-            self.embedding_model = SentenceTransformer(model_name, device="cuda")
-        else:
-            self.embedding_model = FastSentenceTransformer(model_name, device="cpu")
+        device = raw_config.get("embedding_device", "cpu")
+        self.embedding_model = SentenceTransformer(model_name, device=device)
         self.embedding_tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.embedding_dimension = self.embedding_model.get_sentence_embedding_dimension()
         self.index = faiss.IndexFlatL2(self.embedding_dimension)
 
         # Validate the model for each generator.

diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name="airoboros",
-    version="2.2.1",
+    version="2.2.2",
     description="Updated and improved implementation of the self-instruct system.",
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -24,16 +24,12 @@
     install_requires=[
         "aiohttp[speedups]>=3.8",
         "backoff>=2.2",
-        "bitsandbytes>=0.40",
         "requests>=2.28",
         "loguru>=0.7",
         "faiss-cpu==1.7.4",
-        "fast-sentence-transformers==0.4.1",
         "sentence-transformers>=2.2.2",
-        "peft==0.4.0",
         "fastapi>=0.101.0",
         "uvicorn>=0.23.0",
-        "flash_attn==2.1.0",
         "optimum==1.12.0",
         "google-auth==2.25.1",
     ],
@@ -46,6 +42,11 @@
             "fschat",
             "vllm",
         ],
+        "gpu": [
+            "flash_attn>=2.4.2",
+            "bitsandbytes>=0.40",
+            "peft",
+        ],
     },
     classifiers=[
         "Development Status :: 4 - Beta",