Skip to content

Commit

Permalink
Scaffold evals
Browse files Browse the repository at this point in the history
  • Loading branch information
tedspare committed Oct 9, 2024
1 parent f759669 commit a9e8ced
Show file tree
Hide file tree
Showing 7 changed files with 194 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
- [2024-10-09] [Scaffold evals](https://github.com/RubricLab/memory/commit/3801514a795881c74ea225d02eeae001a07ee57a)
- [2024-10-04] [bleed for mono](https://github.com/RubricLab/memory/commit/6db7d39072c60714068bcb00b07bbf917d76b4b8)
- [2024-10-03] [mod lint script](https://github.com/RubricLab/memory/commit/e5a4392d2e7852f0d1424f730aa1316bfeb66f3b)
- [2024-10-03] [gitignore turbo, format](https://github.com/RubricLab/memory/commit/fd8aedfe7144de98f9a405305e3b82839084fab7)
Expand Down
11 changes: 2 additions & 9 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,10 @@
inspired by mem0

- [ ] upsert memory
- [ ] agency to demo the thing
- [ ] dep on prisma
- [ ] agent to demo the thing
- [ ] expose methods to CRUD mems
- [ ] visualize memos in demo
- [x] visualize memos in demo

## Definition of Success

able to dump in facts and see blobs show up in relation to each other

## Examples

- "I went to Balthazar with G on the 10th of March 2023"
- "my cousin Suzy does not like cranberries"
- "I am vegan. ... (2 hours _later_) I am no longer vegan."
11 changes: 11 additions & 0 deletions env.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { createEnv } from '@t3-oss/env-nextjs'
import z from 'zod'

export default createEnv({
runtimeEnv: {
OPENAI_API_KEY: process.env.OPENAI_API_KEY
},
server: {
OPENAI_API_KEY: z.string().min(1)
}
})
60 changes: 60 additions & 0 deletions evals/extractions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
type Fact = {
subject: string
relation: string
object: string
data?: Record<string, string>
}

type Example = {
content: string
facts: Fact[]
}

export const EXAMPLES: Example[] = [
{
content: 'I went to Balthazar with George on the 10th of March 2024',
facts: [
{
subject: 'user',
relation: 'went to',
object: 'Balthazar',
data: {
date: '2024-03-10'
}
},
{
subject: 'user',
relation: 'was with',
object: 'George',
data: {
date: '2024-03-10'
}
}
]
},
{
content: 'my cousin Suzy does not like cranberries',
facts: [
{
subject: 'Suzy',
relation: 'does not like',
object: 'cranberries'
},
{
subject: 'Suzy',
relation: 'is',
object: "user's cousin"
}
]
},
{
content: 'I am vegan... (2 hours later)... I am no longer vegan.',
facts: [
{
subject: 'user',
relation: 'is not',
object: 'vegan'
}
]
}
]
102 changes: 102 additions & 0 deletions evals/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { parseArgs } from 'node:util'
import { openai } from '@ai-sdk/openai'
import { generateObject } from 'ai'
import chalk from 'chalk'
import { z } from 'zod'
import { clean, format } from '../utils/string.ts'
import { EXAMPLES } from './extractions.ts'

export const main = async ({ fast }: { fast?: boolean }) => {
let totalExamples = 0
let totalRecall = 0
let totalEntities = 0

for await (const eg of EXAMPLES) {
let correctEntities = 0
let correctFacts = 0

totalExamples += eg.facts.length

const {
object: { facts: attempts }
} = await generateObject({
model: openai(fast ? 'gpt-4o-mini' : 'gpt-4o-2024-08-06'),
schema: z.object({
facts: z.array(
z.object({
subject: z.string(),
relation: z.string().describe('a verb phrase'),
object: z.string(),
data: z.record(z.string(), z.string()).optional().describe('to capture any additional info')
})
)
}),
messages: [
{
role: 'system',
content: clean`Please extract all probable and implicit facts from the following passage.
Portray the first-person as "user".
Capture new relationships.
Try to capture the most up-to-date state of affairs in present tense.
Passage:
"${eg.content}"`
},
{
role: 'user',
content: eg.content
}
]
})

const omitted: number[] = []

for (const fact of eg.facts) {
console.log(
`\nTarget: ${chalk.magenta(fact.subject)} ${chalk.yellow(fact.relation)} ${chalk.blue(fact.object)}`
)

for (const [index, attempt] of attempts.entries()) {
const { subject, relation, object } = attempt

const subjectMatch = fact.subject === subject
const relationMatch = fact.relation === relation
const objectMatch = fact.object === object

console.log(
`${index + 1} of ${attempts.length}: ${chalk.magenta(format(subject, subjectMatch))} ${chalk.yellow(
format(relation, relationMatch)
)} ${chalk.blue(format(object, objectMatch))}`
)

if (omitted.includes(index)) continue

correctEntities = Number(subjectMatch) + Number(relationMatch) + Number(objectMatch)
correctFacts += Number(subjectMatch && relationMatch && objectMatch)

if (correctEntities === 3) {
omitted.push(index)
break
}
}
}

totalRecall += correctFacts
totalEntities += correctEntities
}

console.log(`\nPrecision: ${chalk.green(`${~~((totalEntities / (totalExamples * 3)) * 100)}%`)}`)
console.log(`Recall: ${chalk.green(`${~~((totalRecall / totalExamples) * 100)}%`)}`)
}

const args = parseArgs({
args: Bun.argv,
options: {
fast: {
type: 'boolean',
default: false
}
},
allowPositionals: true
})

if (import.meta.path === Bun.main) main({ fast: args.values.fast })
10 changes: 8 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@rubriclab/memory",
"module": "index.ts",
"version": "0.0.3",
"version": "0.0.4",
"private": false,
"type": "module",
"devDependencies": {
Expand All @@ -11,8 +11,13 @@
"typescript": "^5.0.0"
},
"dependencies": {
"@ai-sdk/openai": "^0.0.66",
"@rubriclab/config": "*",
"@rubriclab/package": "*",
"@rubriclab/config": "*"
"@t3-oss/env-nextjs": "^0.11.1",
"ai": "^3.4.9",
"chalk": "^5.3.0",
"zod": "^3.23.8"
},
"simple-git-hooks": {
"post-commit": "bun run rubriclab-postcommit"
Expand All @@ -25,6 +30,7 @@
"bleed": "bun x npm-check-updates -u",
"clean": "rm -rf .next && rm -rf node_modules",
"format": "bun x biome format --write .",
"eval": "bun evals/index.ts",
"lint": "bun x biome check . && bun x biome lint .",
"lint:fix": "bun x biome check --fix --unsafe . && bun x biome lint --write --unsafe ."
}
Expand Down
10 changes: 10 additions & 0 deletions utils/string.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import chalk from 'chalk'

export const clean = (strings: TemplateStringsArray, ...values: string[]): string => {
return strings.reduce((result, string, i) => {
return result + string.replaceAll(' ', '') + (values[i] || '')
}, '')
}

export const format = (entity: string, match: boolean) =>
match ? chalk.bgGreen(entity) : chalk.bgRed(entity)

0 comments on commit a9e8ced

Please sign in to comment.