Skip to content

Commit

Permalink
Fix Neural Solution SQL/CMD injection (#1627)
Browse files Browse the repository at this point in the history
Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
Kaihui-intel and pre-commit-ci[bot] authored Feb 27, 2024
1 parent 8ddd755 commit 14b7b0a
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 13 deletions.
2 changes: 1 addition & 1 deletion neural_solution/backend/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def prepare_task(self, task: Task):
if not task.optimized:
# Generate quantization code with Neural Coder API
neural_coder_cmd = ["python -m neural_coder --enable --approach"]
# for users to define approach: "static, ""static_ipex", "dynamic", "auto"
# for users to define approach: "static", "static_ipex", "dynamic", "auto"
approach = task.approach
neural_coder_cmd.append(approach)
if is_remote_url(task.script_url):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ optional arguments:
"script_url": "tf_example1",
"optimized": "True",
"arguments": [
"--dataset_location=dataset --model_path=model"
"--dataset_location=dataset", "--model_path=model"
],
"approach": "static",
"requirements": [
Expand All @@ -106,7 +106,7 @@ When using distributed quantization, the `workers` needs to be set to greater th
"script_url": "tf_example1",
"optimized": "True",
"arguments": [
"--dataset_location=dataset --model_path=model"
"--dataset_location=dataset", "--model_path=model"
],
"approach": "static",
"requirements": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"script_url": "custom_models_optimized/tf_example1",
"optimized": "True",
"arguments": [
"--dataset_location=dataset --model_path=model"
"--dataset_location=dataset", "--model_path=model"
],
"approach": "static",
"requirements": ["tensorflow"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"script_url": "custom_models_optimized/tf_example1",
"optimized": "True",
"arguments": [
"--dataset_location=dataset --model_path=model"
"--dataset_location=dataset", "--model_path=model"
],
"approach": "static",
"requirements": ["tensorflow"
Expand Down
2 changes: 1 addition & 1 deletion neural_solution/examples/hf_models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ optional arguments:
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
],
"approach": "static",
"requirements": [],
Expand Down
2 changes: 1 addition & 1 deletion neural_solution/examples/hf_models/task_request.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
],
"approach": "static",
"requirements": ["datasets", "transformers=4.21.0", "torch"],
Expand Down
2 changes: 1 addition & 1 deletion neural_solution/examples/hf_models_grpc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ optional arguments:
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
],
"approach": "static",
"requirements": [],
Expand Down
2 changes: 1 addition & 1 deletion neural_solution/examples/hf_models_grpc/task_request.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [
"--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"
"--model_name_or_path=bert-base-cased", "--task_name=mrpc", "--do_eval", "--output_dir=result"
],
"approach": "static",
"requirements": ["datasets", "transformers=4.21.0", "torch"],
Expand Down
5 changes: 5 additions & 0 deletions neural_solution/frontend/fastapi/main_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
get_cluster_info,
get_cluster_table,
get_res_during_tuning,
is_valid_task,
list_to_string,
serialize,
)
Expand Down Expand Up @@ -153,10 +154,14 @@ async def submit_task(task: Task):
Returns:
json: status , id of task and messages.
"""
if not is_valid_task(task.dict()):
raise HTTPException(status_code=422, detail="Invalid task")

msg = "Task submitted successfully"
status = "successfully"
# search the current
db_path = get_db_path(config.workspace)

if os.path.isfile(db_path):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
Expand Down
58 changes: 58 additions & 0 deletions neural_solution/frontend/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,61 @@ def list_to_string(lst: list):
str: string
"""
return " ".join(str(i) for i in lst)


def is_invalid_str(to_test_str: str):
"""Verify whether the to_test_str is valid.
Args:
to_test_str (str): string to be tested.
Returns:
bool: valid or invalid
"""
return any(char in to_test_str for char in [" ", '"', "'", "&", "|", ";", "`", ">"])


def is_valid_task(task: dict) -> bool:
"""Verify whether the task is valid.
Args:
task (dict): task request
Returns:
bool: valid or invalid
"""
required_fields = ["script_url", "optimized", "arguments", "approach", "requirements", "workers"]

for field in required_fields:
if field not in task:
return False

if not isinstance(task["script_url"], str) or is_invalid_str(task["script_url"]):
return False

if (isinstance(task["optimized"], str) and task["optimized"] not in ["True", "False"]) or (
not isinstance(task["optimized"], str) and not isinstance(task["optimized"], bool)
):
return False

if not isinstance(task["arguments"], list):
return False
else:
for argument in task["arguments"]:
if is_invalid_str(argument):
return False

if not isinstance(task["approach"], str) or task["approach"] not in ["static", "static_ipex", "dynamic", "auto"]:
return False

if not isinstance(task["requirements"], list):
return False
else:
for requirement in task["requirements"]:
if is_invalid_str(requirement):
return False

if not isinstance(task["workers"], int) or task["workers"] < 1:
return False

return True
22 changes: 18 additions & 4 deletions neural_solution/test/frontend/fastapi/test_main_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,26 @@ def test_get_description(self):
def test_submit_task(self, mock_submit_task):
task = {
"script_url": "http://example.com/script.py",
"optimized": True,
"optimized": "True",
"arguments": ["arg1", "arg2"],
"approach": "approach1",
"approach": "static",
"requirements": ["req1", "req2"],
"workers": 3,
}

# test invalid task
task_invalid = {
"script_url": "http://example.com/script.py",
"optimized": "True",
"arguments": "invalid str, should be list",
"approach": "static",
"requirements": ["req1", "req2"],
"workers": 3,
}
response = client.post("/task/submit/", json=task_invalid)
print(response)
self.assertEqual(response.status_code, 422)
self.assertIn("arguments", response.text)

# test no db case
delete_db()
Expand Down Expand Up @@ -174,7 +188,7 @@ def test_get_task_by_id(self, mock_submit_task):
"script_url": "http://example.com/script.py",
"optimized": True,
"arguments": ["arg1", "arg2"],
"approach": "approach1",
"approach": "static",
"requirements": ["req1", "req2"],
"workers": 3,
}
Expand All @@ -200,7 +214,7 @@ def test_get_task_status_by_id(self, mock_submit_task):
"script_url": "http://example.com/script.py",
"optimized": True,
"arguments": ["arg1", "arg2"],
"approach": "approach1",
"approach": "static",
"requirements": ["req1", "req2"],
"workers": 3,
}
Expand Down
97 changes: 97 additions & 0 deletions neural_solution/test/frontend/fastapi/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
get_cluster_info,
get_cluster_table,
get_res_during_tuning,
is_valid_task,
list_to_string,
serialize,
)
Expand Down Expand Up @@ -110,6 +111,102 @@ def test_list_to_string(self):
expected_result = "Hello Neural Solution"
self.assertEqual(list_to_string(lst), expected_result)

def test_is_valid_task(self):
task_sql_injection = {
"script_url": "https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
"optimized": "False",
"arguments": [],
"approach": "5', '6', 7, 'pending'), ('1b9ff5c2fd2143d58522bd71d18845a3', '2', 3, '4', '5', '6', 7, 'pending') ON CONFLICT (id) DO UPDATE SET id = '1b9ff5c2fd2143d58522bd71d18845a3', q_model_path = '/home/victim/.ssh' --",
"requirements": [],
"workers": 1,
}
self.assertFalse(is_valid_task(task_sql_injection))

task_cmd_injection = {
"script_url": 'https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py & eval "$(echo ZWNobyAiRG9tYWluIGV4cGFuc2lvbiIgPiB+L2F0dGFjay5weSI= | base64 --decode)"',
"optimized": "False",
"arguments": ["--model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result"],
"approach": "static",
"requirements": [],
"workers": 1,
}
self.assertFalse(is_valid_task(task_cmd_injection))

task_lack_field = {
"optimized": "True",
}
self.assertFalse(is_valid_task(task_lack_field))

task_script_url_not_str = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": ["--dataset_location=dataset --model_path=model"],
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_script_url_not_str))

task_optimized_not_bool_str = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True or False",
"arguments": ["--dataset_location=dataset", "--model_path=model"],
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_optimized_not_bool_str))

task_arguments_not_list = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": 123,
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_arguments_not_list))

task_arguments_invalid = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": ["--dataset_location=dataset --model_path=model"],
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_arguments_not_list))

task_approach_is_invalid = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": [],
"approach": "static or dynamic",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertFalse(is_valid_task(task_approach_is_invalid))

task_requirements_not_list = {
"script_url": ["custom_models_optimized/tf_example1"],
"optimized": "True",
"arguments": [],
"approach": "static",
"requirements": "tensorflow",
"workers": 1,
}
self.assertFalse(is_valid_task(task_requirements_not_list))

task_normal = {
"script_url": "custom_models_optimized/tf_example1",
"optimized": "True",
"arguments": ["--dataset_location=dataset", "--model_path=model"],
"approach": "static",
"requirements": ["tensorflow"],
"workers": 1,
}
self.assertTrue(is_valid_task(task_normal))


if __name__ == "__main__":
unittest.main()

0 comments on commit 14b7b0a

Please sign in to comment.