-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
194 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import { createEnv } from '@t3-oss/env-nextjs' | ||
import z from 'zod' | ||
|
||
export default createEnv({ | ||
runtimeEnv: { | ||
OPENAI_API_KEY: process.env.OPENAI_API_KEY | ||
}, | ||
server: { | ||
OPENAI_API_KEY: z.string().min(1) | ||
} | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
type Fact = { | ||
subject: string | ||
relation: string | ||
object: string | ||
data?: Record<string, string> | ||
} | ||
|
||
type Example = { | ||
content: string | ||
facts: Fact[] | ||
} | ||
|
||
export const EXAMPLES: Example[] = [ | ||
{ | ||
content: 'I went to Balthazar with George on the 10th of March 2024', | ||
facts: [ | ||
{ | ||
subject: 'user', | ||
relation: 'went to', | ||
object: 'Balthazar', | ||
data: { | ||
date: '2024-03-10' | ||
} | ||
}, | ||
{ | ||
subject: 'user', | ||
relation: 'was with', | ||
object: 'George', | ||
data: { | ||
date: '2024-03-10' | ||
} | ||
} | ||
] | ||
}, | ||
{ | ||
content: 'my cousin Suzy does not like cranberries', | ||
facts: [ | ||
{ | ||
subject: 'Suzy', | ||
relation: 'does not like', | ||
object: 'cranberries' | ||
}, | ||
{ | ||
subject: 'Suzy', | ||
relation: 'is', | ||
object: "user's cousin" | ||
} | ||
] | ||
}, | ||
{ | ||
content: 'I am vegan... (2 hours later)... I am no longer vegan.', | ||
facts: [ | ||
{ | ||
subject: 'user', | ||
relation: 'is not', | ||
object: 'vegan' | ||
} | ||
] | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import { parseArgs } from 'node:util' | ||
import { openai } from '@ai-sdk/openai' | ||
import { generateObject } from 'ai' | ||
import chalk from 'chalk' | ||
import { z } from 'zod' | ||
import { clean, format } from '../utils/string.ts' | ||
import { EXAMPLES } from './extractions.ts' | ||
|
||
export const main = async ({ fast }: { fast?: boolean }) => { | ||
let totalExamples = 0 | ||
let totalRecall = 0 | ||
let totalEntities = 0 | ||
|
||
for await (const eg of EXAMPLES) { | ||
let correctEntities = 0 | ||
let correctFacts = 0 | ||
|
||
totalExamples += eg.facts.length | ||
|
||
const { | ||
object: { facts: attempts } | ||
} = await generateObject({ | ||
model: openai(fast ? 'gpt-4o-mini' : 'gpt-4o-2024-08-06'), | ||
schema: z.object({ | ||
facts: z.array( | ||
z.object({ | ||
subject: z.string(), | ||
relation: z.string().describe('a verb phrase'), | ||
object: z.string(), | ||
data: z.record(z.string(), z.string()).optional().describe('to capture any additional info') | ||
}) | ||
) | ||
}), | ||
messages: [ | ||
{ | ||
role: 'system', | ||
content: clean`Please extract all probable and implicit facts from the following passage. | ||
Portray the first-person as "user". | ||
Capture new relationships. | ||
Try to capture the most up-to-date state of affairs in present tense. | ||
Passage: | ||
"${eg.content}"` | ||
}, | ||
{ | ||
role: 'user', | ||
content: eg.content | ||
} | ||
] | ||
}) | ||
|
||
const omitted: number[] = [] | ||
|
||
for (const fact of eg.facts) { | ||
console.log( | ||
`\nTarget: ${chalk.magenta(fact.subject)} ${chalk.yellow(fact.relation)} ${chalk.blue(fact.object)}` | ||
) | ||
|
||
for (const [index, attempt] of attempts.entries()) { | ||
const { subject, relation, object } = attempt | ||
|
||
const subjectMatch = fact.subject === subject | ||
const relationMatch = fact.relation === relation | ||
const objectMatch = fact.object === object | ||
|
||
console.log( | ||
`${index + 1} of ${attempts.length}: ${chalk.magenta(format(subject, subjectMatch))} ${chalk.yellow( | ||
format(relation, relationMatch) | ||
)} ${chalk.blue(format(object, objectMatch))}` | ||
) | ||
|
||
if (omitted.includes(index)) continue | ||
|
||
correctEntities = Number(subjectMatch) + Number(relationMatch) + Number(objectMatch) | ||
correctFacts += Number(subjectMatch && relationMatch && objectMatch) | ||
|
||
if (correctEntities === 3) { | ||
omitted.push(index) | ||
break | ||
} | ||
} | ||
} | ||
|
||
totalRecall += correctFacts | ||
totalEntities += correctEntities | ||
} | ||
|
||
console.log(`\nPrecision: ${chalk.green(`${~~((totalEntities / (totalExamples * 3)) * 100)}%`)}`) | ||
console.log(`Recall: ${chalk.green(`${~~((totalRecall / totalExamples) * 100)}%`)}`) | ||
} | ||
|
||
const args = parseArgs({ | ||
args: Bun.argv, | ||
options: { | ||
fast: { | ||
type: 'boolean', | ||
default: false | ||
} | ||
}, | ||
allowPositionals: true | ||
}) | ||
|
||
if (import.meta.path === Bun.main) main({ fast: args.values.fast }) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import chalk from 'chalk' | ||
|
||
export const clean = (strings: TemplateStringsArray, ...values: string[]): string => { | ||
return strings.reduce((result, string, i) => { | ||
return result + string.replaceAll(' ', '') + (values[i] || '') | ||
}, '') | ||
} | ||
|
||
export const format = (entity: string, match: boolean) => | ||
match ? chalk.bgGreen(entity) : chalk.bgRed(entity) |