Skip to content

Commit

Permalink
Generalize eval arch
Browse files Browse the repository at this point in the history
  • Loading branch information
tedspare committed Oct 9, 2024
1 parent e2b4b91 commit 1e3a72a
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 91 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
- [2024-10-09] [Generalize eval arch](https://github.com/RubricLab/memory/commit/bf80487850e840525a1521925a439d7d9fc8d638)
- [2024-10-09] [Add help cmd](https://github.com/RubricLab/memory/commit/9f35d0016dcd5d0f909cb77c2ea33ef70da60fb1)
- [2024-10-09] [Scaffold evals](https://github.com/RubricLab/memory/commit/3801514a795881c74ea225d02eeae001a07ee57a)
- [2024-10-04] [bleed for mono](https://github.com/RubricLab/memory/commit/6db7d39072c60714068bcb00b07bbf917d76b4b8)
Expand Down
93 changes: 4 additions & 89 deletions evals/index.ts
Original file line number Diff line number Diff line change
@@ -1,92 +1,6 @@
import { parseArgs } from 'node:util'
import { openai } from '@ai-sdk/openai'
import { generateObject } from 'ai'
import chalk from 'chalk'
import { z } from 'zod'
import { clean, format } from '../utils/string.ts'
import { EXAMPLES } from './extractions.ts'

export const main = async ({ fast }: { fast?: boolean }) => {
let totalExamples = 0
let totalRecall = 0
let totalEntities = 0

for await (const eg of EXAMPLES) {
let correctEntities = 0
let correctFacts = 0

totalExamples += eg.facts.length

const {
object: { facts: attempts }
} = await generateObject({
model: openai(fast ? 'gpt-4o-mini' : 'gpt-4o-2024-08-06'),
schema: z.object({
facts: z.array(
z.object({
subject: z.string(),
relation: z.string().describe('a verb phrase'),
object: z.string(),
data: z.record(z.string(), z.string()).optional().describe('to capture any additional info')
})
)
}),
messages: [
{
role: 'system',
content: clean`Please extract all probable and implicit facts from the following passage.
Portray the first-person as "user".
Capture new relationships.
Try to capture the most up-to-date state of affairs in present tense.
Passage:
"${eg.content}"`
},
{
role: 'user',
content: eg.content
}
]
})

const omitted: number[] = []

for (const fact of eg.facts) {
console.log(
`\nTarget: ${chalk.magenta(fact.subject)} ${chalk.yellow(fact.relation)} ${chalk.blue(fact.object)}`
)

for (const [index, attempt] of attempts.entries()) {
const { subject, relation, object } = attempt

const subjectMatch = fact.subject === subject
const relationMatch = fact.relation === relation
const objectMatch = fact.object === object

console.log(
`${index + 1} of ${attempts.length}: ${chalk.magenta(format(subject, subjectMatch))} ${chalk.yellow(
format(relation, relationMatch)
)} ${chalk.blue(format(object, objectMatch))}`
)

if (omitted.includes(index)) continue

correctEntities = Number(subjectMatch) + Number(relationMatch) + Number(objectMatch)
correctFacts += Number(subjectMatch && relationMatch && objectMatch)

if (correctEntities === 3) {
omitted.push(index)
break
}
}
}

totalRecall += correctFacts
totalEntities += correctEntities
}

console.log(`\nPrecision: ${chalk.green(`${~~((totalEntities / (totalExamples * 3)) * 100)}%`)}`)
console.log(`Recall: ${chalk.green(`${~~((totalRecall / totalExamples) * 100)}%`)}`)
}
import { runOneShotExamples } from './one-shot'
import { runMultiTurnExamples } from './multi-turn'

const args = parseArgs({
args: Bun.argv,
Expand Down Expand Up @@ -115,5 +29,6 @@ if (import.meta.path === Bun.main) {
process.exit(0)
}

main({ fast: args.values.fast })
await runOneShotExamples({ fast: args.values.fast })
await runMultiTurnExamples({ fast: args.values.fast })
}
1 change: 1 addition & 0 deletions evals/multi-turn/examples.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export const EXAMPLES = []
11 changes: 11 additions & 0 deletions evals/multi-turn/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { EXAMPLES } from './examples'

export const runMultiTurnExamples = async ({
fast
}: {
fast?: boolean
}) => {
for await (const eg of EXAMPLES) {
console.log(eg)
}
}
2 changes: 1 addition & 1 deletion evals/extractions.ts → evals/one-shot/examples.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export const EXAMPLES: Example[] = [
]
},
{
content: 'I am vegan... (2 hours later)... I am no longer vegan.',
content: 'I am vegan... (2 hours later)... I am not vegan.',
facts: [
{
subject: 'user',
Expand Down
96 changes: 96 additions & 0 deletions evals/one-shot/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { openai } from '@ai-sdk/openai'
import { generateObject } from 'ai'
import chalk from 'chalk'
import { z } from 'zod'
import { clean, format } from '../../utils/string.ts'
import { EXAMPLES } from './examples.ts'

export const runOneShotExamples = async ({ fast }: { fast?: boolean }) => {
let totalFacts = 0
let totalRecall = 0
let totalAttempts = 0

for await (const eg of EXAMPLES) {
let correctFacts = 0

totalFacts += eg.facts.length

console.log(chalk.yellow(`\n\n"${eg.content}"`))

const {
object: { facts: attempts }
} = await generateObject({
model: openai(fast ? 'gpt-4o-mini' : 'gpt-4o-2024-08-06'),
schema: z.object({
facts: z.array(
z.object({
subject: z.string(),
relation: z.string().describe('a verb phrase'),
object: z.string(),
data: z.record(z.string(), z.string()).optional().describe('to capture any additional info')
})
)
}),
prompt: clean`Please extract all probable and implicit facts from the following passage.
Portray the first-person as "user".
Capture new relationships.
Try to capture the most up-to-date state of affairs in present tense.
Passage:
"${eg.content}"
`
// messages: [
// {
// role: 'system',
// content: clean`Please extract all probable and implicit facts from the following passage.
// Portray the first-person as "user".
// Capture new relationships.
// Try to capture the most up-to-date state of affairs in present tense.`
// },
// {
// role: 'user',
// content: eg.content
// }
// ]
})

const omitted: number[] = []

for (const [i, fact] of eg.facts.entries()) {
console.log(
`\n🎯 ${i + 1} of ${eg.facts.length}: ${chalk.magenta(fact.subject)} ${chalk.yellow(fact.relation)} ${chalk.blue(fact.object)}`
)

for (const [j, attempt] of attempts.entries()) {
const { subject, relation, object } = attempt

const correctSubject = fact.subject === subject
const correctRelation = fact.relation === relation
const correctObject = fact.object === object

if (omitted.includes(j)) continue
console.log(
`🤖 ${j + 1} of ${attempts.length}: ${chalk.magenta(format(subject, correctSubject))} ${chalk.yellow(
format(relation, correctRelation)
)} ${chalk.blue(format(object, correctObject))}`
)

correctFacts += Number(correctSubject && correctRelation && correctObject)

if (correctFacts) {
omitted.push(j)
break
}
}
}

totalRecall += correctFacts
totalAttempts += attempts.length
}

console.log(
`\n\nPrecision (% of attempts true): ${totalRecall} of ${totalAttempts} ${chalk.green(`(${~~((totalRecall / totalAttempts) * 100)}%)`)}`
)
console.log(
`Recall (% of total facts correctly returned): ${totalRecall} of ${totalFacts} ${chalk.green(`(${~~((totalRecall / totalFacts) * 100)}%)`)}`
)
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@rubriclab/memory",
"module": "index.ts",
"version": "0.0.5",
"version": "0.0.6",
"private": false,
"type": "module",
"devDependencies": {
Expand Down

0 comments on commit 1e3a72a

Please sign in to comment.