Skip to content

Commit

Permalink
Update lm-eval-harness to 0.4.3 (#91)
Browse files Browse the repository at this point in the history
* update lm-eval-harness: 0.4.3

* fix random seeds

* linting

* add sql query with between

* add error caths
  • Loading branch information
AguirreNicolas authored Jul 18, 2024
1 parent 2c9d3dd commit bd31b33
Show file tree
Hide file tree
Showing 20 changed files with 698 additions and 185 deletions.
54 changes: 52 additions & 2 deletions apps/python/evaluator/activities/signatures/tokenizer_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@

from packages.python.common.auto_heartbeater import auto_heartbeater
from packages.python.lmeh.utils.mongodb import MongoOperator
from packages.python.lmeh.utils.tokenizers import load_tokenizer, prepare_tokenizer
from packages.python.lmeh.utils.tokenizers import load_tokenizer, prepare_tokenizer, load_config, prepare_config
from packages.python.protocol.protocol import (
PocketNetworkEvaluationTaskRequest,
PocketNetworkMongoDBResultSignature,
PocketNetworkMongoDBTokenizer,
PocketNetworkMongoDBConfig,
SignatureSample,
PocketNetworkMongoDBResultBase,
)




@activity.defn
@auto_heartbeater
async def tokenizer_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
Expand Down Expand Up @@ -72,6 +75,9 @@ async def tokenizer_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
tokenizer_decoded = False
try:
tokenizer_jsons = json.loads(responses[0]["response"]["response"])
# extrack config from tokenizer jsons
config_jsons = {"config": tokenizer_jsons.pop("config")}
eval_logger.debug("Config", config_jsons=config_jsons)
tokenizer_decoded = True
except Exception as e:
eval_logger.debug("Exeption:", Exeption=str(e))
Expand Down Expand Up @@ -103,6 +109,26 @@ async def tokenizer_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
tokenizer=tokenizer_jsons_loaded, hash=tokenizer_hash_loaded
)
eval_logger.debug("Tokenizer processed.")
######################
### CONFIG
#####################
_config = load_config(
config_objects = config_jsons,
wf_id="",
config_ephimeral_path=temp_path,
)
eval_logger.debug("Config loaded.")
# This creates the structure used in the database, containing the hash
config_jsons_loaded, config_hash_loaded = prepare_config(
_config, CONFIG_EPHIMERAL_PATH=temp_path
)
# TODO
# For instance, the tokenizer hash is used as the config hash
# in future versions, this should be changed
config_mongo_new = PocketNetworkMongoDBConfig(
config=config_jsons_loaded, hash=tokenizer_hash_loaded
)
eval_logger.debug("Config processed.")
tokenizer_ok = True
except Exception as e:
# This is not an error is just a failure in retrieval of tokenizer
Expand Down Expand Up @@ -143,13 +169,37 @@ async def tokenizer_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
signature=str(tokenizer_mongo_new.hash), id=0
) # This task has a single sample id
]
######################
### CONFIG
#####################
config_db = await mongo_operator.get_config_entry(
config_mongo_new.hash
)
if config_db is None:
eval_logger.debug("Config does not exists.")
# the config is not tracked, we need to create an entry
try:
async with mongo_client.start_transaction() as session:
await mongo_client.db["configs"].insert_many(
[config_mongo_new.model_dump(by_alias=True)],
ordered=False,
session=session,
)
eval_logger.debug("Saved new config to DB.")
except Exception as e:
eval_logger.error("Failed to save Config to MongoDB.")
eval_logger.error("Exeption:", Exeption=str(e))
raise ApplicationError(
"Failed to save config to MongoDB.", non_retryable=True
)

# Save to results db (a failure is also an answer)
try:
async with mongo_client.start_transaction() as session:
await mongo_client.db["results"].find_one_and_update(
{"result_data.task_id": args.task_id},
{"$set": result.model_dump(by_alias=True)},
upsert=True,
session=session,
)
await mongo_client.db["tasks"].update_one(
Expand All @@ -166,7 +216,7 @@ async def tokenizer_evaluate(args: PocketNetworkEvaluationTaskRequest) -> bool:
)

eval_logger.info(
"Status:",
"Tokenizer Status:",
tokenizer_decoded=tokenizer_decoded,
tokenizer_is_valid=tokenizer_ok,
tokenizer_is_new=tokenizer_new,
Expand Down
34 changes: 25 additions & 9 deletions apps/python/evaluator/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apps/python/evaluator/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ package-mode = false

[tool.poetry.dependencies]
python = "^3.11"
lm-eval = {version = "^0.4.2", extras = ["dev","openai"]}
lm-eval = {version = "^0.4.2", extras = ["dev","openai","math"]}
temporalio = "^1.5.1"
structlog = "^24.1.0"
pydantic = "^2.7.0"
Expand Down
25 changes: 18 additions & 7 deletions apps/python/sampler/activities/lmeh/sample.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from activities.utils import auto_heartbeater
from app.app import get_app_config, get_app_logger
from temporalio import activity
from temporalio.exceptions import ApplicationError

from packages.python.lmeh.utils.common import get_task_manager
from app.app import get_app_logger, get_app_config
from packages.python.protocol.protocol import PocketNetworkTaskRequest
from packages.python.lmeh.utils import generator as lmeh_generator
from packages.python.lmeh.utils import task_config as open_llm_config
from packages.python.lmeh.pocket_lm_eval.models.pocket_network import PocketNetworkLM
from activities.utils import auto_heartbeater
from packages.python.lmeh.utils import sql as lmeh_sql
from packages.python.lmeh.pocket_lm_eval.tasks import TASK_MANAGER_SAMPLE_STAGE
from packages.python.lmeh.utils import generator as lmeh_generator
from packages.python.lmeh.utils import sql as lmeh_sql
from packages.python.lmeh.utils import task_config as open_llm_config
from packages.python.lmeh.utils.common import get_task_manager
from packages.python.protocol.protocol import PocketNetworkTaskRequest


@activity.defn
Expand Down Expand Up @@ -112,6 +112,17 @@ async def lmeh_sample(args: PocketNetworkTaskRequest) -> bool:

# Instance LM
eval_logger.info("Generating LM")
eval_logger.debug(
"Passed `--trust_remote_code`, setting environment variable `HF_DATASETS_TRUST_REMOTE_CODE=true`"
)
# HACK: import datasets and override its HF_DATASETS_TRUST_REMOTE_CODE value internally,
# because it's already been determined based on the prior env var before launching our
# script--`datasets` gets imported by lm_eval internally before these lines can update the env.
import datasets

datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True

args.llm_args["trust_remote_code"] = True
lm = PocketNetworkLM(
requester_args=args.requester_args,
mongo_client=mongo_client,
Expand Down
32 changes: 24 additions & 8 deletions apps/python/sampler/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion apps/python/sampler/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ package-mode = false

[tool.poetry.dependencies]
python = "^3.11"
lm-eval = {version = "^0.4.2", extras = ["dev","openai"]}
lm-eval = {version = "^0.4.2", extras = ["dev","openai","math"]}
temporalio = "^1.5.1"
structlog = "^24.1.0"
pydantic = "^2.7.0"
Expand Down
15 changes: 9 additions & 6 deletions apps/python/sampler/worker/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import sys
import asyncio

# import concurrent.futures
import multiprocessing
import sys
from concurrent.futures import ProcessPoolExecutor

from temporalio.client import Client
from temporalio.worker import Worker, SharedStateManager
from temporalio.worker import SharedStateManager, Worker
from temporalio.worker.workflow_sandbox import (
SandboxedWorkflowRunner,
SandboxRestrictions,
Expand All @@ -14,14 +15,15 @@
sys.path.append(".")
sys.path.append("../../../")

from packages.python.common.utils import get_from_dict
from app.app import setup_app, get_app_logger
from app.config import read_config
from activities.lmeh.register_task import register_task as lmeh_register_task
from activities.lmeh.sample import lmeh_sample as lmeh_sample
from activities.signatures.signatures import sign_sample
from app.app import get_app_logger, setup_app
from app.config import read_config
from workflows.register import Register
from workflows.sampler import Sampler
from activities.signatures.signatures import sign_sample

from packages.python.common.utils import get_from_dict

# We always want to pass through external modules to the sandbox that we know
# are safe for workflow use
Expand All @@ -41,6 +43,7 @@
"lm_eval",
"pydantic",
"datasets",
"transformers",
]


Expand Down
Loading

0 comments on commit bd31b33

Please sign in to comment.