Skip to content

Commit

Permalink
Merge pull request #58 from jamesturk/november-updates
Browse files Browse the repository at this point in the history
2023-11-06 updates
  • Loading branch information
jamesturk authored Nov 24, 2023
2 parents 5d2a504 + 893230f commit 0091ec4
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: ["3.10", "3.11"]
python-version: ["3.10", "3.11", "3.12"]

steps:
# Python & dependency installation
Expand Down
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## 0.6.0

* bump `pydantic` to version 2.0 and loosen some other dependencies.
* add support for November 2023 model upgrades

## 0.5.1 - 2023-06-13

Expand Down
7 changes: 3 additions & 4 deletions src/scrapeghost/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ def cost(self, prompt_tokens: int, completion_tokens: int) -> float:
models = [
Model("gpt-4", 0.03, 0.06, 8192),
Model("gpt-4-32k", 0.06, 0.12, 32768),
Model("gpt-3.5-turbo", 0.0015, 0.002, 4096),
Model("gpt-3.5-turbo-16k", 0.003, 0.004, 16384),
Model("gpt-3.5-turbo-0613", 0.0015, 0.002, 4096),
Model("gpt-3.5-turbo-16k-0613", 0.003, 0.004, 16384),
Model("gpt-4-1106-preview", 0.01, 0.03, 128000),
Model("gpt-3.5-turbo", 0.001, 0.002, 16384),
Model("gpt-3.5-turbo-1106", 0.001, 0.002, 16384),
]
_model_dict = {model.name: model for model in models}
8 changes: 6 additions & 2 deletions src/scrapeghost/scrapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def __init__(

if use_pydantic:
# check if schema is a pydantic model
if not issubclass(schema, BaseModel):
if not isinstance(schema, type) or not issubclass(schema, BaseModel):
raise ValueError("Schema must be a Pydantic model.")
self.postprocessors.append(PydanticPostprocessor(schema))

Expand Down Expand Up @@ -242,7 +242,11 @@ def _pydantic_to_simple_schema(pydantic_model: Type[BaseModel]) -> dict:
schema: dict = {}
for field_name, field in pydantic_model.model_fields.items():
# model_fields is present on Pydantic models, so can process recursively
if hasattr(field.annotation, "model_fields"):
if field.annotation is None:
raise TypeError("missing annotation")
elif isinstance(field.annotation, type) and issubclass(
field.annotation, BaseModel
):
schema[field_name] = _pydantic_to_simple_schema(field.annotation)
else:
type_name = field.annotation.__name__
Expand Down
12 changes: 6 additions & 6 deletions tests/test_apicall.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_basic_call():
api_call.request("<html>")
assert create.call_count == 1
assert create.call_args.kwargs["model"] == "gpt-3.5-turbo"
assert api_call.total_cost == 0.0000035
assert api_call.total_cost == 0.000003


def test_model_fallback():
Expand All @@ -41,7 +41,7 @@ def _make_n_tokens(n):

def test_model_fallback_token_limit():
api_call = OpenAiCall(
models=["gpt-3.5-turbo", "gpt-4", "gpt-3.5-turbo-16k"],
models=["gpt-4", "gpt-3.5-turbo"],
retry=RetryRule(1, 0), # disable wait
)
with patch_create() as create:
Expand All @@ -52,12 +52,12 @@ def test_model_fallback_token_limit():

# make sure we used the 16k model and only made one request
assert create.call_count == 1
assert create.call_args.kwargs["model"] == "gpt-3.5-turbo-16k"
assert create.call_args.kwargs["model"] == "gpt-3.5-turbo"


def test_model_fallback_token_limit_still_too_big():
api_call = OpenAiCall(
models=["gpt-3.5-turbo-16k", "gpt-4"],
models=["gpt-4", "gpt-3.5-turbo"],
retry=RetryRule(1, 0), # disable wait
)

Expand Down Expand Up @@ -109,7 +109,7 @@ def test_max_cost_exceeded():
prompt_tokens=1000, completion_tokens=1000
)
with pytest.raises(MaxCostExceeded):
for _ in range(300):
for _ in range(350):
api_call.request("<html>" * 1000)


Expand All @@ -123,7 +123,7 @@ def test_stats():
api_call.request("<html>")

assert api_call.stats() == {
"total_cost": pytest.approx(0.043),
"total_cost": pytest.approx(0.042),
"total_prompt_tokens": 20000,
"total_completion_tokens": 2000,
}
2 changes: 1 addition & 1 deletion tests/test_pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_pagination():
assert resp.data[-1]["url"] == "/yak"

assert resp.api_responses == [resp1, resp2, resp3]
assert resp.total_cost == 0.0000105
assert resp.total_cost == 0.000009
assert resp.total_prompt_tokens == 3
assert resp.total_completion_tokens == 3
assert resp.url == "/page2; /page3; https://example.com/page1"
6 changes: 3 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"model,pt,ct,total",
[
("gpt-4", 1000, 1000, 0.09),
("gpt-3.5-turbo", 1000, 1000, 0.0035),
("gpt-3.5-turbo", 2000, 2000, 0.007), # near max
("gpt-3.5-turbo", 1000, 1000, 0.003),
("gpt-3.5-turbo", 2000, 2000, 0.006), # near max
("gpt-4", 4000, 4000, 0.36), # near max
],
)
Expand All @@ -17,5 +17,5 @@ def test_cost_calc(model, pt, ct, total):


def test_cost_estimate():
assert utils.cost_estimate("hello" * 1000, "gpt-3.5-turbo") == pytest.approx(0.0025)
assert utils.cost_estimate("hello" * 1000, "gpt-3.5-turbo") == pytest.approx(0.002)
assert utils.cost_estimate("hello" * 1000, "gpt-4") == pytest.approx(0.06)

0 comments on commit 0091ec4

Please sign in to comment.