Merge pull request #58 from jamesturk/november-updates

2023-11-06 updates
jamesturk · Nov 24, 2023 · 0091ec4 · 0091ec4
2 parents 5d2a504 + 893230f
commit 0091ec4
Show file tree

Hide file tree

Showing 7 changed files with 21 additions and 17 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ["3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
     # Python & dependency installation

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -3,6 +3,7 @@
 ## 0.6.0
 
 * bump `pydantic` to version 2.0 and loosen some other dependencies.
+* add support for November 2023 model upgrades
 
 ## 0.5.1 - 2023-06-13
 

diff --git a/src/scrapeghost/models.py b/src/scrapeghost/models.py
@@ -18,9 +18,8 @@ def cost(self, prompt_tokens: int, completion_tokens: int) -> float:
 models = [
     Model("gpt-4", 0.03, 0.06, 8192),
     Model("gpt-4-32k", 0.06, 0.12, 32768),
-    Model("gpt-3.5-turbo", 0.0015, 0.002, 4096),
-    Model("gpt-3.5-turbo-16k", 0.003, 0.004, 16384),
-    Model("gpt-3.5-turbo-0613", 0.0015, 0.002, 4096),
-    Model("gpt-3.5-turbo-16k-0613", 0.003, 0.004, 16384),
+    Model("gpt-4-1106-preview", 0.01, 0.03, 128000),
+    Model("gpt-3.5-turbo", 0.001, 0.002, 16384),
+    Model("gpt-3.5-turbo-1106", 0.001, 0.002, 16384),
 ]
 _model_dict = {model.name: model for model in models}
diff --git a/src/scrapeghost/scrapers.py b/src/scrapeghost/scrapers.py
@@ -87,7 +87,7 @@ def __init__(
 
         if use_pydantic:
             # check if schema is a pydantic model
-            if not issubclass(schema, BaseModel):
+            if not isinstance(schema, type) or not issubclass(schema, BaseModel):
                 raise ValueError("Schema must be a Pydantic model.")
             self.postprocessors.append(PydanticPostprocessor(schema))
 
@@ -242,7 +242,11 @@ def _pydantic_to_simple_schema(pydantic_model: Type[BaseModel]) -> dict:
     schema: dict = {}
     for field_name, field in pydantic_model.model_fields.items():
         # model_fields is present on Pydantic models, so can process recursively
-        if hasattr(field.annotation, "model_fields"):
+        if field.annotation is None:
+            raise TypeError("missing annotation")
+        elif isinstance(field.annotation, type) and issubclass(
+            field.annotation, BaseModel
+        ):
             schema[field_name] = _pydantic_to_simple_schema(field.annotation)
         else:
             type_name = field.annotation.__name__

diff --git a/tests/test_apicall.py b/tests/test_apicall.py
@@ -15,7 +15,7 @@ def test_basic_call():
         api_call.request("<html>")
     assert create.call_count == 1
     assert create.call_args.kwargs["model"] == "gpt-3.5-turbo"
-    assert api_call.total_cost == 0.0000035
+    assert api_call.total_cost == 0.000003
 
 
 def test_model_fallback():
@@ -41,7 +41,7 @@ def _make_n_tokens(n):
 
 def test_model_fallback_token_limit():
     api_call = OpenAiCall(
-        models=["gpt-3.5-turbo", "gpt-4", "gpt-3.5-turbo-16k"],
+        models=["gpt-4", "gpt-3.5-turbo"],
         retry=RetryRule(1, 0),  # disable wait
     )
     with patch_create() as create:
@@ -52,12 +52,12 @@ def test_model_fallback_token_limit():
 
     # make sure we used the 16k model and only made one request
     assert create.call_count == 1
-    assert create.call_args.kwargs["model"] == "gpt-3.5-turbo-16k"
+    assert create.call_args.kwargs["model"] == "gpt-3.5-turbo"
 
 
 def test_model_fallback_token_limit_still_too_big():
     api_call = OpenAiCall(
-        models=["gpt-3.5-turbo-16k", "gpt-4"],
+        models=["gpt-4", "gpt-3.5-turbo"],
         retry=RetryRule(1, 0),  # disable wait
     )
 
@@ -109,7 +109,7 @@ def test_max_cost_exceeded():
             prompt_tokens=1000, completion_tokens=1000
         )
         with pytest.raises(MaxCostExceeded):
-            for _ in range(300):
+            for _ in range(350):
                 api_call.request("<html>" * 1000)
 
 
@@ -123,7 +123,7 @@ def test_stats():
             api_call.request("<html>")
 
     assert api_call.stats() == {
-        "total_cost": pytest.approx(0.043),
+        "total_cost": pytest.approx(0.042),
         "total_prompt_tokens": 20000,
         "total_completion_tokens": 2000,
     }
diff --git a/tests/test_pagination.py b/tests/test_pagination.py
@@ -91,7 +91,7 @@ def test_pagination():
     assert resp.data[-1]["url"] == "/yak"
 
     assert resp.api_responses == [resp1, resp2, resp3]
-    assert resp.total_cost == 0.0000105
+    assert resp.total_cost == 0.000009
     assert resp.total_prompt_tokens == 3
     assert resp.total_completion_tokens == 3
     assert resp.url == "/page2; /page3; https://example.com/page1"
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -6,8 +6,8 @@
     "model,pt,ct,total",
     [
         ("gpt-4", 1000, 1000, 0.09),
-        ("gpt-3.5-turbo", 1000, 1000, 0.0035),
-        ("gpt-3.5-turbo", 2000, 2000, 0.007),  # near max
+        ("gpt-3.5-turbo", 1000, 1000, 0.003),
+        ("gpt-3.5-turbo", 2000, 2000, 0.006),  # near max
         ("gpt-4", 4000, 4000, 0.36),  # near max
     ],
 )
@@ -17,5 +17,5 @@ def test_cost_calc(model, pt, ct, total):
 
 
 def test_cost_estimate():
-    assert utils.cost_estimate("hello" * 1000, "gpt-3.5-turbo") == pytest.approx(0.0025)
+    assert utils.cost_estimate("hello" * 1000, "gpt-3.5-turbo") == pytest.approx(0.002)
     assert utils.cost_estimate("hello" * 1000, "gpt-4") == pytest.approx(0.06)