Merge branch 'nomic-ai:main' into main

abdulrahman305 · Jan 19, 2024 · 5ccb2bf · 5ccb2bf
2 parents 0e20f61 + a9c5f53
commit 5ccb2bf
Show file tree

Hide file tree

Showing 109 changed files with 4,617 additions and 1,373 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -11,6 +11,7 @@ workflows:
           base-revision: main
           config-path: .circleci/continue_config.yml
           mapping: |
+            .circleci/.* run-all-workflows true
             gpt4all-bindings/python/.* run-python-workflow true
             gpt4all-bindings/typescript/.* run-ts-workflow true
             gpt4all-bindings/csharp/.* run-csharp-workflow true

diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml
@@ -5,6 +5,9 @@ orbs:
   node: circleci/node@5.1
 
 parameters:
+  run-all-workflows:
+    type: boolean
+    default: false
   run-default-workflow:
     type: boolean
     default: false
@@ -287,6 +290,7 @@ jobs:
             $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\VS\include"
             $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\include"
             $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\ATLMFC\include"
+            $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1"
             mkdir build
             cd build
             & "C:\Qt\Tools\CMake_64\bin\cmake.exe" `
@@ -348,6 +352,7 @@ jobs:
           install-yarn: true
           node-version: "18.16"
       - run: node --version
+      - run: corepack enable
       - node/install-packages:
           pkg-manager: yarn
           app-dir: gpt4all-bindings/typescript
@@ -482,8 +487,9 @@ jobs:
             cd gpt4all-backend
             mkdir build
             cd build
-            $env:Path += ";C:\ProgramData\mingw64\mingw64\bin"
-            $env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
+            $Env:Path += ";C:\ProgramData\mingw64\mingw64\bin"
+            $Env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
+            $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1"
             cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF
             cmake --build . --parallel
       - run:
@@ -853,6 +859,7 @@ jobs:
           install-yarn: true
           node-version: "18.16"
       - run: node --version
+      - run: corepack enable
       - node/install-packages:
           app-dir: gpt4all-bindings/typescript
           pkg-manager: yarn
@@ -883,6 +890,7 @@ jobs:
           install-yarn: true
           node-version: "18.16"
       - run: node --version
+      - run: corepack enable
       - node/install-packages:
           app-dir: gpt4all-bindings/typescript
           pkg-manager: yarn
@@ -895,14 +903,14 @@ jobs:
           name: "Persisting all necessary things to workspace"
           command: |  
             mkdir -p gpt4all-backend/prebuilds/darwin-x64
-            mkdir -p gpt4all-backend/runtimes/darwin-x64
-            cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin-x64
+            mkdir -p gpt4all-backend/runtimes/darwin
+            cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin
             cp gpt4all-bindings/typescript/prebuilds/darwin-x64/*.node gpt4all-backend/prebuilds/darwin-x64
       - persist_to_workspace:
           root: gpt4all-backend
           paths:
             - prebuilds/darwin-x64/*.node
-            - runtimes/darwin-x64/*-*.*
+            - runtimes/darwin/*-*.*
 
   build-nodejs-windows: 
     executor:
@@ -924,6 +932,7 @@ jobs:
               nvm install 18.16.0
               nvm use 18.16.0
       - run: node --version 
+      - run: corepack enable
       - run:           
           command: |
             npm install -g yarn
@@ -957,6 +966,7 @@ jobs:
           install-yarn: true
           node-version: "18.16"
       - run: node --version
+      - run: corepack enable
       - run: 
           command: |
             cd gpt4all-bindings/typescript
@@ -971,9 +981,12 @@ jobs:
             cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/ 
             cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/    
 
-            mkdir -p runtimes/darwin-x64/native
+            # darwin has univeral runtime libraries
+            mkdir -p runtimes/darwin/native
             mkdir -p prebuilds/darwin-x64/
-            cp /tmp/gpt4all-backend/runtimes/darwin-x64/*-*.* runtimes/darwin-x64/native/
+
+            cp /tmp/gpt4all-backend/runtimes/darwin/*-*.* runtimes/darwin/native/
+
             cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/    
             
             # Fallback build if user is not on above prebuilds
@@ -1001,11 +1014,17 @@ jobs:
 workflows:
   version: 2
   default:
-    when: << pipeline.parameters.run-default-workflow >>
+    when:
+      or:
+       - << pipeline.parameters.run-all-workflows >>
+       - << pipeline.parameters.run-default-workflow >>
     jobs:
       - default-job
   build-chat-offline-installers:
-    when: << pipeline.parameters.run-chat-workflow >>
+    when:
+      or:
+        - << pipeline.parameters.run-all-workflows >>
+        - << pipeline.parameters.run-chat-workflow >>
     jobs:
       - hold:
           type: approval
@@ -1019,7 +1038,10 @@ workflows:
           requires:
             - hold
   build-and-test-gpt4all-chat:
-    when: << pipeline.parameters.run-chat-workflow >>
+    when:
+      or:
+        - << pipeline.parameters.run-all-workflows >>
+        - << pipeline.parameters.run-chat-workflow >>
     jobs:
       - hold:
           type: approval
@@ -1033,7 +1055,10 @@ workflows:
           requires:
             - hold
   deploy-docs:
-    when: << pipeline.parameters.run-python-workflow >>
+    when:
+      or:
+        - << pipeline.parameters.run-all-workflows >>
+        - << pipeline.parameters.run-python-workflow >>
     jobs:
       - build-ts-docs:
           filters:
@@ -1046,7 +1071,10 @@ workflows:
               only:
                 - main
   build-py-deploy:
-    when: << pipeline.parameters.run-python-workflow >>
+    when:
+      or:
+        - << pipeline.parameters.run-all-workflows >>
+        - << pipeline.parameters.run-python-workflow >>
     jobs:
       - pypi-hold:
           type: approval
@@ -1081,10 +1109,11 @@ workflows:
             - build-py-macos
   build-bindings:
     when: 
-        or:
-         - << pipeline.parameters.run-python-workflow >>
-         - << pipeline.parameters.run-csharp-workflow >>
-         - <<   pipeline.parameters.run-ts-workflow   >>
+      or:
+       - << pipeline.parameters.run-all-workflows >>
+       - << pipeline.parameters.run-python-workflow >>
+       - << pipeline.parameters.run-csharp-workflow >>
+       - << pipeline.parameters.run-ts-workflow >>
     jobs:
       - hold:
           type: approval

diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,2 +1 @@
-blank_issues_enabled: false
-version: 2.1
+version: 2.1
diff --git a/.gitignore b/.gitignore
@@ -183,4 +183,7 @@ build_*
 build-*
 
 # IntelliJ
-.idea/
+.idea/
+
+# LLM models
+*.gguf
diff --git a/gpt4all-api/docker-compose.yaml b/gpt4all-api/docker-compose.yaml
@@ -7,13 +7,16 @@ services:
     restart: always #restart on error (usually code compilation from save during bad state)
     ports:
       - "4891:4891"
+    env_file:
+      - .env
     environment:
       - APP_ENVIRONMENT=dev
       - WEB_CONCURRENCY=2
       - LOGLEVEL=debug
       - PORT=4891
-      - model=ggml-mpt-7b-chat.bin
+      - model=${MODEL_BIN} # using variable from .env file
       - inference_mode=cpu
     volumes:
       - './gpt4all_api/app:/app'
+      - './gpt4all_api/models:/models' # models are mounted in the container
     command: ["/start-reload.sh"]
diff --git a/gpt4all-api/gpt4all_api/Dockerfile.buildkit b/gpt4all-api/gpt4all_api/Dockerfile.buildkit
@@ -1,8 +1,6 @@
 # syntax=docker/dockerfile:1.0.0-experimental
 FROM tiangolo/uvicorn-gunicorn:python3.11
 
-ARG MODEL_BIN=ggml-mpt-7b-chat.bin
-
 # Put first so anytime this file changes other cached layers are invalidated.
 COPY gpt4all_api/requirements.txt /requirements.txt
 
@@ -17,7 +15,3 @@ COPY gpt4all_api/app /app
 
 RUN mkdir -p /models
 
-# Include the following line to bake a model into the image and not have to download it on API start.
-RUN wget -q --show-progress=off https://gpt4all.io/models/${MODEL_BIN} -P /models \
-  && md5sum /models/${MODEL_BIN}
-
diff --git a/gpt4all-api/gpt4all_api/app/__init__.py b/gpt4all-api/gpt4all_api/app/__init__.py
diff --git a/gpt4all-api/gpt4all_api/app/api_v1/__init__.py b/gpt4all-api/gpt4all_api/app/api_v1/__init__.py
diff --git a/gpt4all-api/gpt4all_api/app/api_v1/routes/__init__.py b/gpt4all-api/gpt4all_api/app/api_v1/routes/__init__.py
diff --git a/gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py b/gpt4all-api/gpt4all_api/app/api_v1/routes/chat.py
@@ -1,39 +1,35 @@
 import logging
 import time
-from typing import Dict, List
-
-from api_v1.settings import settings
-from fastapi import APIRouter, Depends, Response, Security, status
+from typing import List
+from uuid import uuid4
+from fastapi import APIRouter
 from pydantic import BaseModel, Field
+from api_v1.settings import settings
+from fastapi.responses import StreamingResponse
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 
 ### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
-
-
 class ChatCompletionMessage(BaseModel):
     role: str
     content: str
 
-
 class ChatCompletionRequest(BaseModel):
-    model: str = Field(..., description='The model to generate a completion from.')
-    messages: List[ChatCompletionMessage] = Field(..., description='The model to generate a completion from.')
-
+    model: str = Field(settings.model, description='The model to generate a completion from.')
+    messages: List[ChatCompletionMessage] = Field(..., description='Messages for the chat completion.')
 
 class ChatCompletionChoice(BaseModel):
     message: ChatCompletionMessage
     index: int
+    logprobs: float
     finish_reason: str
 
-
 class ChatCompletionUsage(BaseModel):
     prompt_tokens: int
     completion_tokens: int
     total_tokens: int
 
-
 class ChatCompletionResponse(BaseModel):
     id: str
     object: str = 'text_completion'
@@ -42,20 +38,38 @@ class ChatCompletionResponse(BaseModel):
     choices: List[ChatCompletionChoice]
     usage: ChatCompletionUsage
 
-
 router = APIRouter(prefix="/chat", tags=["Completions Endpoints"])
 
-
 @router.post("/completions", response_model=ChatCompletionResponse)
 async def chat_completion(request: ChatCompletionRequest):
     '''
-    Completes a GPT4All model response.
+    Completes a GPT4All model response based on the last message in the chat.
     '''
+    # Example: Echo the last message content with some modification
+    if request.messages:
+        last_message = request.messages[-1].content
+        response_content = f"Echo: {last_message}"
+    else:
+        response_content = "No messages received."
+
+    # Create a chat message for the response
+    response_message = ChatCompletionMessage(role="system", content=response_content)
+
+    # Create a choice object with the response message
+    response_choice = ChatCompletionChoice(
+        message=response_message,
+        index=0,
+        logprobs=-1.0,  # Placeholder value
+        finish_reason="length"  # Placeholder value
+    )
 
-    return ChatCompletionResponse(
-        id='asdf',
-        created=time.time(),
+    # Create the response object
+    chat_response = ChatCompletionResponse(
+        id=str(uuid4()),
+        created=int(time.time()),
         model=request.model,
-        choices=[{}],
-        usage={'prompt_tokens': 0, 'completion_tokens': 0, 'total_tokens': 0},
+        choices=[response_choice],
+        usage=ChatCompletionUsage(prompt_tokens=0, completion_tokens=0, total_tokens=0),  # Placeholder values
     )
+
+    return chat_response
diff --git a/gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py b/gpt4all-api/gpt4all_api/app/api_v1/routes/engines.py
@@ -1,40 +1,39 @@
-import logging
-from typing import Dict, List
-
-from api_v1.settings import settings
-from fastapi import APIRouter, Depends, Response, Security, status
+import requests
+from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
+from typing import List, Dict
 
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-
-### This should follow https://github.com/openai/openai-openapi/blob/master/openapi.yaml
-
+# Define the router for the engines module
+router = APIRouter(prefix="/engines", tags=["Search Endpoints"])
 
+# Define the models for the engines module
 class ListEnginesResponse(BaseModel):
     data: List[Dict] = Field(..., description="All available models.")
 
-
 class EngineResponse(BaseModel):
     data: List[Dict] = Field(..., description="All available models.")
 
 
-router = APIRouter(prefix="/engines", tags=["Search Endpoints"])
-
-
+# Define the routes for the engines module
 @router.get("/", response_model=ListEnginesResponse)
 async def list_engines():
-    '''
-    List all available GPT4All models from
-    https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models2.json
-    '''
-    raise NotImplementedError()
-    return ListEnginesResponse(data=[])
-
-
+    try:
+        response = requests.get('https://raw.githubusercontent.com/nomic-ai/gpt4all/main/gpt4all-chat/metadata/models2.json')
+        response.raise_for_status()  # This will raise an HTTPError if the HTTP request returned an unsuccessful status code
+        engines = response.json()
+        return ListEnginesResponse(data=engines)
+    except requests.RequestException as e:
+        logger.error(f"Error fetching engine list: {e}")
+        raise HTTPException(status_code=500, detail="Error fetching engine list")
+
+# Define the routes for the engines module
 @router.get("/{engine_id}", response_model=EngineResponse)
 async def retrieve_engine(engine_id: str):
-    ''' '''
-
-    raise NotImplementedError()
-    return EngineResponse()
+    try:
+        # Implement logic to fetch a specific engine's details
+        # This is a placeholder, replace with your actual data retrieval logic
+        engine_details = {"id": engine_id, "name": "Engine Name", "description": "Engine Description"}
+        return EngineResponse(data=[engine_details])
+    except Exception as e:
+        logger.error(f"Error fetching engine details: {e}")
+        raise HTTPException(status_code=500, detail=f"Error fetching details for engine {engine_id}")