Merge pull request #6 from Gordon-BP/ai-mvp

Ai mvp LGTM!
Gordon-BP · Aug 10, 2023 · 36b3ac6 · 36b3ac6
2 parents 7d66211 + 7d946cb
commit 36b3ac6
Show file tree

Hide file tree

Showing 13 changed files with 688 additions and 199 deletions.
diff --git a/combined.log b/combined.log
diff --git a/error.log b/error.log
@@ -45,3 +45,30 @@
 {"level":"error","message":"Error while scanning directory tree","service":"file-service"}
 {"level":"\u001b[31merror\u001b[39m","message":"Code Gen Error TypeError: Converting circular structure to JSON\n    --> starting at object with constructor 'ClientRequest'\n    |     property 'socket' -> object with constructor 'Socket'\n    --- property '_httpMessage' closes the circle","service":"cg-router","timestamp":"2023-08-08T18:03:20.533Z"}
 {"level":"\u001b[31merror\u001b[39m","message":"Code Gen Error TypeError: Converting circular structure to JSON\n    --> starting at object with constructor 'ClientRequest'\n    |     property 'socket' -> object with constructor 'Socket'\n    --- property '_httpMessage' closes the circle","service":"cg-router","timestamp":"2023-08-08T18:12:20.967Z"}
+{"level":"error","message":"Error while reading file:Error: ENOENT: no such file or directory, open 'repos/Gordon-BP/taylor-test-repo/Taylor_Issue_69/index.html'","service":"file-service"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:25:56.573Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:26:21.737Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:27:04.661Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:28:43.964Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:30:28.203Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:31:03.626Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:31:48.719Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:34:47.912Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:35:12.027Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:35:51.349Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T08:44:56.958Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T09:01:12.243Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T09:01:27.957Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Linting issues found:[object Object]","service":"vr-router","timestamp":"2023-08-10T09:12:08.099Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error executing code: SyntaxError: Unexpected token 'this'","service":"vr-router","timestamp":"2023-08-10T09:32:23.163Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error executing code: ReferenceError: __context__ is not defined","service":"vr-router","timestamp":"2023-08-10T09:33:04.945Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error executing code: ReferenceError: __context__ is not defined","service":"vr-router","timestamp":"2023-08-10T09:33:17.158Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error executing code: ReferenceError: __context__ is not defined","service":"vr-router","timestamp":"2023-08-10T09:34:06.653Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error executing code: ReferenceError: __context__ is not defined","service":"vr-router","timestamp":"2023-08-10T09:34:35.016Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error executing code: SyntaxError: Unexpected token 'this'","service":"vr-router","timestamp":"2023-08-10T09:35:16.657Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error executing code: SyntaxError: await is only valid in async functions and the top level bodies of modules","service":"vr-router","timestamp":"2023-08-10T09:39:13.941Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error executing code: TypeError: eval(...).call is not a function","service":"vr-router","timestamp":"2023-08-10T09:41:23.530Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error making PR: AxiosError: Request failed with status code 400}","service":"vr-router","timestamp":"2023-08-10T13:45:14.914Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error making PR: AxiosError: Request failed with status code 400}","service":"vr-router","timestamp":"2023-08-10T13:46:29.823Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error making PR: AxiosError: Request failed with status code 400}","service":"vr-router","timestamp":"2023-08-10T13:49:04.757Z"}
+{"level":"\u001b[31merror\u001b[39m","message":"Error making PR: TypeError: Converting circular structure to JSON\n    --> starting at object with constructor 'ClientRequest'\n    |     property 'socket' -> object with constructor 'Socket'\n    --- property '_httpMessage' closes the circle}","service":"vr-router","timestamp":"2023-08-10T13:51:41.901Z"}
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -62,6 +62,7 @@
   },
   "dependencies": {
     "@types/axios": "^0.14.0",
+    "@types/eslint": "^8.44.2",
     "axios": "^1.4.0",
     "body-parser": "^1.20.2",
     "dree": "^4.5.5",

diff --git a/readme.md b/readme.md
@@ -1,84 +1,53 @@
 # Taylor the Junior Dev!
+## AKA Code-Voyager
+![image](taylor_jd_banner.png)
+
+An implementation of MineDojo's [Voyager](https://voyager.minedojo.org/) focused on closing Github issues instead of playing Minecraft.
+
+## What makes this agent so special?
+Taylor ~~steals~~ builds upon Voyager's architecture to complete long-horizon tasks better than other LLM-powered agents. This includes:
+* Dynamic task decomposition based on both the initial Github issue and past successes/failures
+* Custom code-generation agent that explores your Github repo in order to write more applicable code
+* Self-validation both by traditional methods (compiler checks, linting, etc) and with an LLM
+
+This agent also interfaces __entirely through Github!_ As a Github app, Taylor automatically:
+* Starts work on Issues as soon as they are submitted
+* Clones the repo, forks a new branch, and commits to this new branch only
+* Creates a pull request when they believe the issue is resolved.
+
+## Tech Stack:
+* Node.js + Typescript
+* Express.js for the API
+* Langchain.js to interface with the LLM stuff
+* OpenAI APIs for the LLM
+
+# This is version 0.1- it barely works 😅
+Some things it can do:
+- [x] Connect to a repo as a Github app
+- [x] Fetch an issue, decompose it into a clear task, and assign that task to the code generation agent
+- [x] Commit and open a PR when the code generation is finished
+
+## Things to come in version 0.2
+- [ ] Self-verification agent that doesn't rubber-stamp approve everything
+- [ ] Return lint errors back to the code generator without failing the task
+- [ ] Set up logging on a per-task basis to add environmental data to each chain
+- [ ] Vector database based off of the repo code + library documentation
+- [ ] Skills library and retriever (like Voyager)
+- [ ] Move model configs from hardcoded to .env vars
+- [ ] Meet with a skilled backend JS dev to talk about better ways to structure the app to reduce duplicate code
+
+## Things saved for version 0.3 and onwards
+- [ ] Dockerized app
+- [ ] Unit tests for each route
+- [ ] Internet search retriever (maybe)
+- [ ] Experiment with other models for each task
+    - I'm _super_ interested in trying out code-specific LLMs like StarCoder, and that 2.7B Ghostwriter.
+- [ ] Documentation about how to get your own Taylor server up and running
+- [ ] Extreme dogfooding- add Taylor to this repo and have the agent improve its own code
 
-An AI-powered software dev that does some pretty cool things!
-App ID 364693
-
-## To-do list
-
-**Self Validation**
-> I'm also on team agent for this part, too. It should be able to read any and all files, as well as the logs, to determine if the task is passed or not.
-- [ ] Verification bash script
-    - [ ] Lint check fixes whatever it can automatically
-    - [ ] Return any errors back to the LLM
-- [ ] Run the code ⚠️ oooh scary
-    - [ ] Return any errors back to the LLM
-- [X] Read file tool
-- [ ] Get the last 10 or so logs (omit unnecessary info like taskID, service)
-- [ ] Return pass/fail
-
-## Done ✅ Tasks
-**Github Utilities:** --Almost there!
-- [x] Clone repo
-- [x] Create new branch
-- [x] Stage, Commit, and Push combo
-- [x] Create Pull Request
-- [x] Get Issue
-- [x] Listen for issue creation
-- [x] Listen for pull request updates
-- [x] Comment on issue
-- [x] auth as app <- BIG WIN
-
-✅ DONE WITH GITHUB ✅
-
-
-**Primitive Skills:**
-- [x] Read file
-- [x] Write or Create File
-- [x] Delete File
-- [x] Get Directory Tree
-- [x] Tests for file skills
-
-✅ DONE WITH PRIMITIVES ✅
-
-**Other Utilities**
-- [x] Stream task-specific logs to a task-specific file
-- [ ] Better webhook hosting than ngrok
-- [ ] Clean up your server-side logging
-
-**Task Generation**
-- [~] Fetch info
-    - [x] Dir tree
-    - [x] Past tasks passed
-    - [x] Past tasks failed
-    - [ ] Reference from docs (pushed to v0.0.2)
-- [x] Generate Questions
-- [x] Answer questions
-- [x] Use all info to make task
-- [x] Return task as single string
-
-✅ DONE WITH TASK GENERATION ✅ (for now)
-
-**Code Generation**
-> Now I'm back on team agent for this part. While simple stuff (like changing a README file) can be done in one shot, bug-fixing and small tweaks require a loop of Read File > Analyze > Make changes. We can either pass the contents of all the files into the LLM context, or we can let it read whatever files it wants.
-- [x] Fetch info
-    - [x] Dir tree
-    - [~] Relevant docs (pushed to v0.0.2)
-- [ ] Skills library
-    - [x] Primitive skills
-    - [ ] LLM's own custom skills
-- [x] Save code to a file
-✅ DONE WITH CODE GENERATION ✅ (for now)
-
-> Do we _have_ to require the LLM to write Javascript? Like, the code it writes will only be consuming APIs. It'll write additional code in strings, but there's no set reason why it has to use JS/TS to call those APIs, right?
-
-
-- [X] **Client Service**
-> Very much need a proper client service to consume these APIs with. I can more clearly understand the nodes & flows, and having all the steps as API services is really helpful. However, one client is needed per issue for the following reasons:
-    1. Client-side logs will be automatically scoped to the issue/task ID
-    2. Client can listen for webhooks, too, when tasks are pending PR review, comment clarification, or GH automatic checks
-    3. Server-Client structure is better suited for having 100+ clients running at once.
 
 ## Other Notes 'N Stuff
+> Do we _have_ to require the LLM to write Javascript? Like, the code it writes will only be consuming APIs. It'll write additional code in strings, but there's no set reason why it has to use JS/TS to call those APIs, right?
 * Web research retriever
     - will need to implement on your own with https://serpapi.com/integrations/node and LLM chain
 * HNSWlib indices https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/hnswlib

diff --git a/src/App.ts b/src/App.ts
@@ -5,6 +5,7 @@ import { tg_router } from "./routes/task_route.js";
 import bodyParser from "body-parser";
 import { fs_router } from "./routes/file_routes.js";
 import { cg_router } from "./routes/code_gen_route.js";
+import { vr_router } from "./routes/verification_route.js";
 import GithubUtils from "./utils/github_utils.js";
 import { v4 as uuid } from "uuid";
 import winston from "winston";
@@ -101,6 +102,7 @@ class App {
     this.express.use("/app/v1/files", fs_router);
     this.express.use("/app/v1/task", tg_router);
     this.express.use("/app/v1/code", cg_router);
+    this.express.use("/app/v1/verify", vr_router)
     this.express.use("/docs", express.static("./docs"));
   }
 }

diff --git a/src/prompts/verification_output_parser.ts b/src/prompts/verification_output_parser.ts
@@ -1,7 +1,7 @@
 import { AgentActionOutputParser } from "langchain/agents";
 import { AgentAction, AgentFinish } from "langchain/schema";
 
-export default class CodeOutputParser extends AgentActionOutputParser {
+export default class VerifyOutputParser extends AgentActionOutputParser {
   lc_namespace = ["langchain", "agents", "custom_llm_agent_chat"];
 
   async parse(text: string): Promise<AgentAction | AgentFinish> {

diff --git a/src/prompts/verification_prompt_template.ts b/src/prompts/verification_prompt_template.ts
@@ -29,7 +29,7 @@ async function load_prompts(): Promise<PromptArray> {
   return { prefix: PREFIX, instructions: formatInstructions, suffix: SUFFIX };
 }
 
-export default class CodePromptTemplate extends BaseChatPromptTemplate {
+export default class VerifyPromptTemplate extends BaseChatPromptTemplate {
   tools: DynamicStructuredTool[];
 
   constructor(args: {

diff --git a/src/routes/code_gen_route.ts b/src/routes/code_gen_route.ts
@@ -14,14 +14,11 @@ import winston from "winston";
 import axios, { AxiosRequestConfig } from "axios";
 import { AgentExecutor, LLMSingleActionAgent } from "langchain/agents";
 import { LLMChain } from "langchain";
+import {readFiles, writeFiles, submit} from "../utils/fileTools.js"
 //TODO: Change to be configurable from env
 export const cg_router = express.Router();
 const endpoint = "http://127.0.0.1:3000/app/v1";
 
-interface FileData {
-  path: string;
-  data: string;
-}
 cg_router.use(bodyParser.json());
 const logger = winston.createLogger({
   level: "debug",
@@ -52,75 +49,6 @@ if (process.env.NODE_ENV !== "production") {
     }),
   );
 }
-/**
- * Retrieves the contents of multiple files specified by their file paths.
- * 
- * @param paths - An array of file paths.
- * @param taskId - The task ID for identification purposes.
- * @returns A promise that resolves with a string containing the concatenated contents of all the files.
- */
-function getFiles(paths: string[], taskId: string): Promise<string> {
-  let fileContent = "";
-  const promises = paths.map((filePath) => {
-    if (/\.\/repos/.test(filePath)) {
-      filePath = filePath.split("./repos")[1];
-    }
-    return axios.get(`${endpoint}/files/${filePath}`, {
-      data: {
-        taskId: taskId,
-      },
-    });
-  });
-  return new Promise((resolve, reject) => {
-    Promise.all(promises)
-      .then((responses) => {
-        const fileContents = responses.map((response) => response.data);
-        fileContent = fileContents.join("\n");
-        resolve(fileContent);
-      })
-      .catch((err) => {
-        logger.error(`Error fetching files: ${err}`);
-        reject(err);
-      });
-  });
-}
-function writeFiles(fileData: FileData[], taskId: string): string {
-  let resp = "";
-  const promises = fileData.map((file) => {
-    if (/\.\/repos/.test(file.path)) {
-      file.path = file.path.split("./repos")[1];
-    }
-    const owner = file.path.split("/")[0];
-    const repo = file.path.split("/")[1];
-    const branchName = file.path.split("/")[2];
-    const filePath = file.path.split("/").slice(3).join("/");
-    return axios.post(`${endpoint}/files/${filePath}`, {
-      params: {
-        owner: owner,
-        repo: repo,
-        branchName: branchName,
-      },
-      data: {
-        filePath: filePath,
-        data: file.data,
-        taskId: taskId,
-      },
-    });
-  });
-  Promise.all(promises)
-    .then((responses) => {
-      resp = `Files successfully written:\n${responses}`;
-    })
-    .catch((err) => {
-      logger.error(`Error writing files: ${err}`);
-      resp = `Error writing files: ${err}`;
-    });
-  return resp;
-}
-function submit(data: string, taskId: string): string {
-  const resp = writeFiles([{ path: "/tmp/tmp.js", data: data }], taskId);
-  return resp;
-}
 
 /**
  * ============================================
@@ -163,17 +91,17 @@ cg_router.post(
             .describe("Your function that successfully resolves the task"),
           taskId: z.string().describe("Which task this call is for"),
         }),
-        func: async ({ data, taskId }) => submit(data, taskId),
+        func: async ({ data, taskId }) => submit(data, task),
         returnDirect: false,
       }),
       new DynamicStructuredTool({
-        name: "Get Files",
-        description: "Gets the data from each file in the provided list",
+        name: "Reads Files",
+        description: "Returns the data from each file in the provided list",
         schema: z.object({
           paths: z.array(z.string()),
           taskId: z.string(),
         }),
-        func: async ({ paths, taskId }) => getFiles(paths, taskId),
+        func: async ({ paths, taskId }) => readFiles(paths, taskId),
         returnDirect: false,
       }),
       new DynamicStructuredTool({
@@ -190,7 +118,7 @@ cg_router.post(
           ),
           taskId: z.string().describe("Which task this call is for"),
         }),
-        func: async ({ files, taskId }) => writeFiles(files, taskId),
+        func: async ({ files, taskId }) => writeFiles(files, task),
         returnDirect: false,
       }),
     ];