From bbaa8ae1d78ee5f459e9faff4214da1b84d65675 Mon Sep 17 00:00:00 2001 From: Wambere Date: Tue, 21 May 2024 18:22:59 +0300 Subject: [PATCH] Bulk import JSON FHIR resources (#187) * Initial commit for json fhir resources bulk import * Update sync strategy with option to sort based on resource type * Handle long resource lists * Update split_index to work with resources not separated by a new line * Add progressbar to show progress as chunks are processed * Make number of resources configurable * Add documentation for json array resource import * Clean up * Add tests * sample json * Formatting --- importer/README.md | 11 + importer/main.py | 166 +++++++++- importer/test_main.py | 560 +++++++++++++++++++++----------- importer/tests/json/sample.json | 1 + 4 files changed, 551 insertions(+), 187 deletions(-) create mode 100644 importer/tests/json/sample.json diff --git a/importer/README.md b/importer/README.md index e58b7f1f..54cb6eab 100644 --- a/importer/README.md +++ b/importer/README.md @@ -147,3 +147,14 @@ The coverage report `coverage.html` will be at the working directory - See example csv [here](/importer/csv/import/inventory.csv) - This creates a Group resource for each inventory imported - The first two columns __name__ and __active__ is the minimum required + +### 12. Import JSON resources from file +- Run `python3 main.py --bulk_import True --json_file tests/fhir_sample.json --chunk_size 500000 --sync sort --resources_count 100 --log_level info` +- This takes in a file with a JSON array, reads the resources from the array in the file and posts them to the FHIR server +- `bulk_import` (Required) must be set to True +- `json_file` (Required) points to the file with the json array. The resources in the array need to be separated by a single comma (no spaces) and the **"id"** must always be the first attribute in the resource object. This is what the code uses to identify the beginning and end of resources +- `chunk_size` (Not required) is the number of characters to read from the JSON file at a time. The size of this file can potentially be very large, so we do not want to read it all at once, we read it in chunks. This number **MUST** be at least the size of the largest single resource in the array. The default is set to 1,000,000 +- `sync` (Not required) defines the sync strategy. This can be either **direct** (which is the default) or **sort** + - **Direct** will read the resources one chunk at a time, while building a payload and posting to the server before reading the next chunk. This works if you have referential integrity turned off in the FHIR server + - **Sort** will read all the resources in the file first and sort them into different resource types. It will then build separate payloads for the different resource types and try to post them to the FHIR server in the order that the resources first appear in the JSON file. For example, if you want Patients to be synced first, then make sure that the first resource is a Patient resource +- `resources_count` (Not required) is the number of resources put in a bundle when posting the resources to the FHIR server. The default is set to 100 diff --git a/importer/main.py b/importer/main.py index fe013daa..dc2f8640 100644 --- a/importer/main.py +++ b/importer/main.py @@ -82,7 +82,7 @@ def get_access_token(): # This function makes the request to the provided url # to create resources @backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_time=180) -def post_request(request_type, payload, url): +def post_request(request_type, payload, url, json_payload): logging.info("Posting request") logging.info("Request type: " + request_type) logging.info("Url: " + url) @@ -92,9 +92,9 @@ def post_request(request_type, payload, url): headers = {"Content-type": "application/json", "Authorization": access_token} if request_type == "POST": - return requests.post(url, data=payload, headers=headers) + return requests.post(url, data=payload, json=json_payload, headers=headers) elif request_type == "PUT": - return requests.put(url, data=payload, headers=headers) + return requests.put(url, data=payload, json=json_payload, headers=headers) elif request_type == "GET": return requests.get(url, headers=headers) elif request_type == "DELETE": @@ -103,9 +103,9 @@ def post_request(request_type, payload, url): logging.error("Unsupported request type!") -def handle_request(request_type, payload, url): +def handle_request(request_type, payload, url, json_payload=None): try: - response = post_request(request_type, payload, url) + response = post_request(request_type, payload, url, json_payload) if response.status_code == 200 or response.status_code == 201: logging.info("[" + str(response.status_code) + "]" + ": SUCCESS!") @@ -1494,6 +1494,136 @@ def save_image(image_source_url): return 0 +def process_chunk(resources_array: list, resource_type: str): + new_arr = [] + with click.progressbar( + resources_array, label="Progress::Processing chunks ... " + ) as resources_array_progress: + for resource in resources_array_progress: + if not resource_type: + resource_type = resource["resourceType"] + try: + resource_id = resource["id"] + except KeyError: + if "identifier" in resource: + resource_identifier = resource["identifier"][0]["value"] + resource_id = str( + uuid.uuid5(uuid.NAMESPACE_DNS, resource_identifier) + ) + else: + resource_id = str(uuid.uuid4()) + + item = {"resource": resource, "request": {}} + item["request"]["method"] = "PUT" + item["request"]["url"] = "/".join([resource_type, resource_id]) + new_arr.append(item) + + json_payload = {"resourceType": "Bundle", "type": "transaction", "entry": new_arr} + + r = handle_request("POST", "", config.fhir_base_url, json_payload) + logging.info(r.text) + # TODO handle failures + + +def set_resource_list( + objs: str = None, + json_list: list = None, + resource_type: str = None, + number_of_resources: int = 100, +): + if objs: + resources_array = json.loads(objs) + process_chunk(resources_array, resource_type) + if json_list: + if len(json_list) > number_of_resources: + for i in range(0, len(json_list), number_of_resources): + sub_list = json_list[i : i + number_of_resources] + process_chunk(sub_list, resource_type) + else: + process_chunk(json_list, resource_type) + + +def build_mapped_payloads(resource_mapping, json_file, resources_count): + with open(json_file, "r") as file: + data_dict = json.load(file) + with click.progressbar( + resource_mapping, label="Progress::Setting up ... " + ) as resource_mapping_progress: + for resource_type in resource_mapping_progress: + index_positions = resource_mapping[resource_type] + resource_list = [data_dict[i] for i in index_positions] + set_resource_list(None, resource_list, resource_type, resources_count) + + +def build_resource_type_map(resources: str, mapping: dict, index_tracker: int = 0): + resource_list = json.loads(resources) + for index, resource in enumerate(resource_list): + resource_type = resource["resourceType"] + if resource_type in mapping.keys(): + mapping[resource_type].append(index + index_tracker) + else: + mapping[resource_type] = [index + index_tracker] + + global import_counter + import_counter = len(resource_list) + import_counter + + +def split_chunk( + chunk: str, + left_over_chunk: str, + size: int, + mapping: dict = None, + sync: str = None, + import_counter: int = 0, +): + if len(chunk) + len(left_over_chunk) < int(size): + # load can fit in one chunk, so remove closing bracket + last_bracket = chunk.rfind("}") + current_chunk = chunk[: int(last_bracket)] + next_left_over_chunk = "-" + if len(chunk.strip()) == 0: + last_bracket = left_over_chunk.rfind("}") + left_over_chunk = left_over_chunk[: int(last_bracket)] + else: + # load can't fit, so split on last full resource + split_index = chunk.rfind( + '},{"id"' + ) # Assumption that this string will find the last full resource + current_chunk = chunk[:split_index] + next_left_over_chunk = chunk[int(split_index) + 2 :] + if len(chunk.strip()) == 0: + last_bracket = left_over_chunk.rfind("}") + left_over_chunk = left_over_chunk[: int(last_bracket)] + + if len(left_over_chunk.strip()) == 0: + current_chunk = current_chunk[1:] + + chunk_list = "[" + left_over_chunk + current_chunk + "}]" + + if sync.lower() == "direct": + set_resource_list(chunk_list) + if sync.lower() == "sort": + build_resource_type_map(chunk_list, mapping, import_counter) + return next_left_over_chunk + + +def read_file_in_chunks(json_file: str, chunk_size: int, sync: str): + logging.info("Reading file in chunks ...") + incomplete_load = "" + mapping = {} + global import_counter + import_counter = 0 + with open(json_file, "r") as file: + while True: + chunk = file.read(chunk_size) + if not chunk: + break + incomplete_load = split_chunk( + chunk, incomplete_load, chunk_size, mapping, sync, import_counter + ) + return mapping + + class ResponseFilter(logging.Filter): def __init__(self, param=None): self.param = param @@ -1523,6 +1653,7 @@ def filter(self, record): @click.command() @click.option("--csv_file", required=False) +@click.option("--json_file", required=False) @click.option("--access_token", required=False) @click.option("--resource_type", required=False) @click.option("--assign", required=False) @@ -1538,8 +1669,18 @@ def filter(self, record): @click.option("--parameter", required=False, default="_lastUpdated") @click.option("--value", required=False, default="gt2023-01-01") @click.option("--limit", required=False, default=1000) +@click.option("--bulk_import", required=False, default=False) +@click.option("--chunk_size", required=False, default=1000000) +@click.option("--resources_count", required=False, default=100) +@click.option( + "--sync", + type=click.Choice(["DIRECT", "SORT"], case_sensitive=False), + required=False, + default="DIRECT", +) def main( csv_file, + json_file, access_token, resource_type, assign, @@ -1553,6 +1694,10 @@ def main( parameter, value, limit, + bulk_import, + chunk_size, + resources_count, + sync, ): if log_level == "DEBUG": logging.basicConfig( @@ -1580,6 +1725,17 @@ def main( export_resources_to_csv(resource_type, parameter, value, limit) exit() + if bulk_import: + logging.info("Starting bulk import...") + resource_mapping = read_file_in_chunks(json_file, chunk_size, sync) + if sync.lower() == "sort": + build_mapped_payloads(resource_mapping, json_file, resources_count) + end_time = datetime.now() + logging.info("End time: " + end_time.strftime("%H:%M:%S")) + total_time = end_time - start_time + logging.info("Total time: " + str(total_time.total_seconds()) + " seconds") + exit() + # set access token if access_token: global global_access_token diff --git a/importer/test_main.py b/importer/test_main.py index 1bda7696..e11fd9eb 100644 --- a/importer/test_main.py +++ b/importer/test_main.py @@ -15,6 +15,8 @@ confirm_keycloak_user, confirm_practitioner, check_parent_admin_level, + split_chunk, + read_file_in_chunks, ) @@ -44,7 +46,9 @@ def test_write_csv(self): ] self.test_resource_type = "test_organization" self.test_fieldnames = ["name", "active", "method", "id", "identifier"] - csv_file = write_csv(self.test_data, self.test_resource_type, self.test_fieldnames) + csv_file = write_csv( + self.test_data, self.test_resource_type, self.test_fieldnames + ) csv_content = read_csv(csv_file) self.assertEqual(csv_content, self.test_data) @@ -103,14 +107,9 @@ def test_build_payload_organizations(self, mock_get_resource): "id": {"const": "3da051e0-d743-5574-8f0e-6cb8798551f5"}, "identifier": {"type": "array", "items": {"type": "object"}}, "active": {"const": "true"}, - "name": {"const": "Min Organization"} + "name": {"const": "Min Organization"}, }, - "required": [ - "id", - "identifier", - "active", - "name" - ], + "required": ["id", "identifier", "active", "name"], } validate(payload_obj["entry"][0]["resource"], resource_schema) @@ -126,7 +125,9 @@ def test_build_payload_organizations(self, mock_get_resource): @patch("main.check_parent_admin_level") @patch("main.get_resource") - def test_build_payload_locations(self, mock_get_resource, mock_check_parent_admin_level): + def test_build_payload_locations( + self, mock_get_resource, mock_check_parent_admin_level + ): mock_get_resource.return_value = "1" mock_check_parent_admin_level.return_value = "3" @@ -190,9 +191,9 @@ def test_build_payload_locations(self, mock_get_resource, mock_check_parent_admi "type": "object", "properties": { "longitude": {"const": 36.81}, - "latitude": {"const": -1.28} - } - } + "latitude": {"const": -1.28}, + }, + }, }, "required": [ "resourceType", @@ -236,14 +237,9 @@ def test_build_payload_locations(self, mock_get_resource, mock_check_parent_admi "id": {"const": "c4336f73-4450-566b-b381-d07a6e857d72"}, "identifier": {"type": "array", "items": {"type": "object"}}, "status": {"const": "active"}, - "name": {"const": "City1"} + "name": {"const": "City1"}, }, - "required": [ - "id", - "identifier", - "status", - "name" - ], + "required": ["id", "identifier", "status", "name"], } validate(payload_obj["entry"][0]["resource"], resource_schema) @@ -265,10 +261,7 @@ def test_check_parent_admin_level(self, mock_get_base_url, mock_handle_request): "resourceType": "Location", "id": "18fcbc2e-4240-4a84-a270-7a444523d7b6", "identifier": [ - { - "use": "official", - "value": "18fcbc2e-4240-4a84-a270-7a444523d7b6" - } + {"use": "official", "value": "18fcbc2e-4240-4a84-a270-7a444523d7b6"} ], "status": "active", "name": "test location-1", @@ -278,11 +271,11 @@ def test_check_parent_admin_level(self, mock_get_base_url, mock_handle_request): { "system": "https://smartregister.org/codes/administrative-level", "code": "2", - "display": "Level 2" + "display": "Level 2", } ] } - ] + ], } string_mocked_response_text = json.dumps(mocked_response_text) mock_handle_request.return_value = (string_mocked_response_text, 200) @@ -327,28 +320,32 @@ def test_build_payload_care_teams(self, mock_get_resource): "items": { "type": "object", "properties": { - "system": {"const": "http://snomed.info/sct"}, + "system": { + "const": "http://snomed.info/sct" + }, "code": {"const": "394730007"}, - "display": {"const": "Healthcare related organization"} - } - } + "display": { + "const": "Healthcare related organization" + }, + }, + }, } - } - } + }, + }, }, "member": { "type": "object", "properties": { "reference": {"type": "string"}, - "display": {"type": "string"} - } - } + "display": {"type": "string"}, + }, + }, }, "anyOf": [ {"required": ["role", "member"]}, - {"required": ["member"]} - ] - } + {"required": ["member"]}, + ], + }, }, "managingOrganization": { "type": "array", @@ -356,19 +353,20 @@ def test_build_payload_care_teams(self, mock_get_resource): "type": "object", "properties": { "reference": {"type": "string"}, - "display": {"type": "string"} - } - } - } + "display": {"type": "string"}, + }, + }, + }, }, - "required": ["resourceType", - "id", - "identifier", - "status", - "name", - "participant", - "managingOrganization" - ], + "required": [ + "resourceType", + "id", + "identifier", + "status", + "name", + "participant", + "managingOrganization", + ], } validate(payload_obj["entry"][0]["resource"], resource_schema) @@ -391,7 +389,8 @@ def test_build_payload_group(self, mock_get_resource, mock_save_image): csv_file = "csv/import/product.csv" resource_list = read_csv(csv_file) payload = build_payload( - "Group", resource_list, "json_payloads/product_group_payload.json") + "Group", resource_list, "json_payloads/product_group_payload.json" + ) payload_obj = json.loads(payload) self.assertIsInstance(payload_obj, dict) @@ -406,13 +405,9 @@ def test_build_payload_group(self, mock_get_resource, mock_save_image): "identifier": {"type": "array", "items": {"type": "object"}}, "active": {"const": "true"}, "name": {"const": "thermometer"}, - "characteristic": { - "type": "array", - "minItems": 6, - "maxItems": 6 - } + "characteristic": {"type": "array", "minItems": 6, "maxItems": 6}, }, - "required": ["resourceType", "id", "identifier", "active", "name"] + "required": ["resourceType", "id", "identifier", "active", "name"], } validate(payload_obj["entry"][0]["resource"], resource_schema_0) @@ -424,13 +419,9 @@ def test_build_payload_group(self, mock_get_resource, mock_save_image): "identifier": {"type": "array", "items": {"type": "object"}}, "active": {"const": "true"}, "name": {"const": "sterilizer"}, - "characteristic": { - "type": "array", - "minItems": 2, - "maxItems": 2 - } + "characteristic": {"type": "array", "minItems": 2, "maxItems": 2}, }, - "required": ["resourceType", "id", "identifier", "active", "name"] + "required": ["resourceType", "id", "identifier", "active", "name"], } validate(payload_obj["entry"][1]["resource"], resource_schema_1) @@ -738,7 +729,9 @@ def test_export_resource_to_csv( @patch("main.handle_request") @patch("main.get_base_url") - def test_build_assign_payload_update_assigned_org(self, mock_get_base_url, mock_handle_request): + def test_build_assign_payload_update_assigned_org( + self, mock_get_base_url, mock_handle_request + ): mock_get_base_url.return_value = "https://example.smartregister.org/fhir" mock_response_data = { "resourceType": "Bundle", @@ -751,22 +744,27 @@ def test_build_assign_payload_update_assigned_org(self, mock_get_base_url, mock_ "meta": {"versionId": "2"}, "practitioner": { "reference": "Practitioner/f5d49ba0-50d7-4491-bd6c-62e429707a03", - "display": "Jenn" + "display": "Jenn", }, "organization": { "reference": "Organization/8342dd77-aecd-48ab-826b-75c7c33039ed", - "display": "Health Organization" - } + "display": "Health Organization", + }, } } - ] + ], } string_response = json.dumps(mock_response_data) mock_response = (string_response, 200) mock_handle_request.return_value = mock_response resource_list = [ - ["Jenn", "f5d49ba0-50d7-4491-bd6c-62e429707a03", "New Org", "98199caa-4455-4b2f-a5cf-cb9c89b6bbdc"] + [ + "Jenn", + "f5d49ba0-50d7-4491-bd6c-62e429707a03", + "New Org", + "98199caa-4455-4b2f-a5cf-cb9c89b6bbdc", + ] ] payload = build_assign_payload(resource_list, "PractitionerRole") payload_obj = json.loads(payload) @@ -777,18 +775,25 @@ def test_build_assign_payload_update_assigned_org(self, mock_get_base_url, mock_ self.assertEqual( payload_obj["entry"][0]["resource"]["practitioner"], - mock_response_data["entry"][0]["resource"]["practitioner"]) + mock_response_data["entry"][0]["resource"]["practitioner"], + ) self.assertNotEqual( payload_obj["entry"][0]["resource"]["organization"], - mock_response_data["entry"][0]["resource"]["organization"]) + mock_response_data["entry"][0]["resource"]["organization"], + ) self.assertEqual( payload_obj["entry"][0]["resource"]["organization"]["reference"], - "Organization/98199caa-4455-4b2f-a5cf-cb9c89b6bbdc") - self.assertEqual(payload_obj["entry"][0]["resource"]["organization"]["display"], "New Org") + "Organization/98199caa-4455-4b2f-a5cf-cb9c89b6bbdc", + ) + self.assertEqual( + payload_obj["entry"][0]["resource"]["organization"]["display"], "New Org" + ) @patch("main.handle_request") @patch("main.get_base_url") - def test_build_assign_payload_create_org_assignment(self, mock_get_base_url, mock_handle_request): + def test_build_assign_payload_create_org_assignment( + self, mock_get_base_url, mock_handle_request + ): mock_get_base_url.return_value = "https://example.smartregister.org/fhir" mock_response_data = { "resourceType": "Bundle", @@ -801,18 +806,23 @@ def test_build_assign_payload_create_org_assignment(self, mock_get_base_url, moc "meta": {"versionId": "2"}, "practitioner": { "reference": "Practitioner/f5d49ba0-50d7-4491-bd6c-62e429707a03", - "display": "Jenn" - } + "display": "Jenn", + }, } } - ] + ], } string_response = json.dumps(mock_response_data) mock_response = (string_response, 200) mock_handle_request.return_value = mock_response resource_list = [ - ["Jenn", "f5d49ba0-50d7-4491-bd6c-62e429707a03", "New Org", "98199caa-4455-4b2f-a5cf-cb9c89b6bbdc"] + [ + "Jenn", + "f5d49ba0-50d7-4491-bd6c-62e429707a03", + "New Org", + "98199caa-4455-4b2f-a5cf-cb9c89b6bbdc", + ] ] payload = build_assign_payload(resource_list, "PractitionerRole") payload_obj = json.loads(payload) @@ -823,26 +833,34 @@ def test_build_assign_payload_create_org_assignment(self, mock_get_base_url, moc self.assertEqual( payload_obj["entry"][0]["resource"]["practitioner"], - mock_response_data["entry"][0]["resource"]["practitioner"]) + mock_response_data["entry"][0]["resource"]["practitioner"], + ) self.assertEqual( payload_obj["entry"][0]["resource"]["organization"]["reference"], - "Organization/98199caa-4455-4b2f-a5cf-cb9c89b6bbdc") - self.assertEqual(payload_obj["entry"][0]["resource"]["organization"]["display"], "New Org") + "Organization/98199caa-4455-4b2f-a5cf-cb9c89b6bbdc", + ) + self.assertEqual( + payload_obj["entry"][0]["resource"]["organization"]["display"], "New Org" + ) @patch("main.handle_request") @patch("main.get_base_url") - def test_build_assign_payload_create_new_practitioner_role(self, mock_get_base_url, mock_handle_request): + def test_build_assign_payload_create_new_practitioner_role( + self, mock_get_base_url, mock_handle_request + ): mock_get_base_url.return_value = "https://example.smartregister.org/fhir" - mock_response_data = { - "resourceType": "Bundle", - "total": 0 - } + mock_response_data = {"resourceType": "Bundle", "total": 0} string_response = json.dumps(mock_response_data) mock_response = (string_response, 200) mock_handle_request.return_value = mock_response resource_list = [ - ["Jenn", "f5d49ba0-50d7-4491-bd6c-62e429707a03", "New Org", "98199caa-4455-4b2f-a5cf-cb9c89b6bbdc"] + [ + "Jenn", + "f5d49ba0-50d7-4491-bd6c-62e429707a03", + "New Org", + "98199caa-4455-4b2f-a5cf-cb9c89b6bbdc", + ] ] payload = build_assign_payload(resource_list, "PractitionerRole") payload_obj = json.loads(payload) @@ -853,63 +871,112 @@ def test_build_assign_payload_create_new_practitioner_role(self, mock_get_base_u self.assertEqual( payload_obj["entry"][0]["resource"]["practitioner"]["reference"], - "Practitioner/f5d49ba0-50d7-4491-bd6c-62e429707a03") + "Practitioner/f5d49ba0-50d7-4491-bd6c-62e429707a03", + ) self.assertEqual( - payload_obj["entry"][0]["resource"]["practitioner"]["display"], "Jenn") + payload_obj["entry"][0]["resource"]["practitioner"]["display"], "Jenn" + ) self.assertEqual( payload_obj["entry"][0]["resource"]["organization"]["reference"], - "Organization/98199caa-4455-4b2f-a5cf-cb9c89b6bbdc") - self.assertEqual(payload_obj["entry"][0]["resource"]["organization"]["display"], "New Org") - - @patch('main.logging') - @patch('main.handle_request') - @patch('main.get_keycloak_url') - def test_create_user(self, mock_get_keycloak_url, mock_handle_request, mock_logging): - mock_get_keycloak_url.return_value = "https://keycloak.smartregister.org/auth/admin/realms/example-realm" + "Organization/98199caa-4455-4b2f-a5cf-cb9c89b6bbdc", + ) + self.assertEqual( + payload_obj["entry"][0]["resource"]["organization"]["display"], "New Org" + ) + + @patch("main.logging") + @patch("main.handle_request") + @patch("main.get_keycloak_url") + def test_create_user( + self, mock_get_keycloak_url, mock_handle_request, mock_logging + ): + mock_get_keycloak_url.return_value = ( + "https://keycloak.smartregister.org/auth/admin/realms/example-realm" + ) mock_handle_request.return_value.status_code = 201 - mock_handle_request.return_value.headers = {"Location": "https://keycloak.smartregister.org/auth/admin/realms" - "/example-realm/users/6cd50351-3ddb-4296-b1db" - "-aac2273e35f3"} + mock_handle_request.return_value.headers = { + "Location": "https://keycloak.smartregister.org/auth/admin/realms" + "/example-realm/users/6cd50351-3ddb-4296-b1db" + "-aac2273e35f3" + } mocked_user_data = ( - 'Jenn', 'Doe', 'Jenny', 'jeendoe@example.com', '431cb523-253f-4c44-9ded-af42c55c0bbb', 'Supervisor', 'TRUE', - 'a715b562-27f2-432a-b1ba-e57db35e0f93', 'test', 'demo', 'pa$$word' + "Jenn", + "Doe", + "Jenny", + "jeendoe@example.com", + "431cb523-253f-4c44-9ded-af42c55c0bbb", + "Supervisor", + "TRUE", + "a715b562-27f2-432a-b1ba-e57db35e0f93", + "test", + "demo", + "pa$$word", ) user_id = create_user(mocked_user_data) self.assertEqual(user_id, "6cd50351-3ddb-4296-b1db-aac2273e35f3") - mock_logging.info.assert_called_with('Setting user password') + mock_logging.info.assert_called_with("Setting user password") - @patch('main.handle_request') - @patch('main.get_keycloak_url') - def test_create_user_already_exists(self, mock_get_keycloak_url, mock_handle_request): - mock_get_keycloak_url.return_value = "https://keycloak.smartregister.org/auth/admin/realms/example-realm" + @patch("main.handle_request") + @patch("main.get_keycloak_url") + def test_create_user_already_exists( + self, mock_get_keycloak_url, mock_handle_request + ): + mock_get_keycloak_url.return_value = ( + "https://keycloak.smartregister.org/auth/admin/realms/example-realm" + ) mock_handle_request.return_value.status_code = 409 mocked_user_data = ( - 'Jenn', 'Doe', 'Jenn', 'jendoe@example.com', ' 99d54e3c-c26f-4500-a7f9-3f4cb788673f', 'Supervisor', 'false', - 'a715b562-27f2-432a-b1ba-e57db35e0f93', 'test', 'demo', 'pa$$word' + "Jenn", + "Doe", + "Jenn", + "jendoe@example.com", + " 99d54e3c-c26f-4500-a7f9-3f4cb788673f", + "Supervisor", + "false", + "a715b562-27f2-432a-b1ba-e57db35e0f93", + "test", + "demo", + "pa$$word", ) user_id = create_user(mocked_user_data) self.assertEqual(user_id, 0) # Test the confirm_keycloak function - @patch('main.logging') - @patch('main.handle_request') - @patch('main.get_keycloak_url') - def test_confirm_keycloak_user(self, mock_get_keycloak_url, mock_handle_request, mock_logging): - mock_get_keycloak_url.return_value = "https://keycloak.smartregister.org/auth/admin/realms/example-realm" + @patch("main.logging") + @patch("main.handle_request") + @patch("main.get_keycloak_url") + def test_confirm_keycloak_user( + self, mock_get_keycloak_url, mock_handle_request, mock_logging + ): + mock_get_keycloak_url.return_value = ( + "https://keycloak.smartregister.org/auth/admin/realms/example-realm" + ) mocked_user_data = ( - 'Jenn', 'Doe', 'Jenny', 'jeendoe@example.com', '431cb523-253f-4c44-9ded-af42c55c0bbb', 'Supervisor', 'TRUE', - 'a715b562-27f2-432a-b1ba-e57db35e0f93', 'test', 'demo', 'pa$$word' + "Jenn", + "Doe", + "Jenny", + "jeendoe@example.com", + "431cb523-253f-4c44-9ded-af42c55c0bbb", + "Supervisor", + "TRUE", + "a715b562-27f2-432a-b1ba-e57db35e0f93", + "test", + "demo", + "pa$$word", ) user_id = create_user(mocked_user_data) self.assertEqual(user_id, 0) - mock_response = ('[{"id":"6cd50351-3ddb-4296-b1db-aac2273e35f3","createdTimestamp":1710151827166,' - '"username":"Jenny","enabled":true,"totp":false,"emailVerified":false,"firstName":"Jenn",' - '"lastName":"Doe","email":"jeendoe@example.com","attributes":{"fhir_core_app_id":["demo"]},' - '"disableableCredentialTypes":[],"requiredActions":[],"notBefore":0,"access":{' - '"manageGroupMembership":true,"view":true,"mapRoles":true,"impersonate":true,' - '"manage":true}}]', 200) + mock_response = ( + '[{"id":"6cd50351-3ddb-4296-b1db-aac2273e35f3","createdTimestamp":1710151827166,' + '"username":"Jenny","enabled":true,"totp":false,"emailVerified":false,"firstName":"Jenn",' + '"lastName":"Doe","email":"jeendoe@example.com","attributes":{"fhir_core_app_id":["demo"]},' + '"disableableCredentialTypes":[],"requiredActions":[],"notBefore":0,"access":{' + '"manageGroupMembership":true,"view":true,"mapRoles":true,"impersonate":true,' + '"manage":true}}]', + 200, + ) mock_handle_request.return_value = mock_response mock_json_response = json.loads(mock_response[0]) keycloak_id = confirm_keycloak_user(mocked_user_data) @@ -919,13 +986,24 @@ def test_confirm_keycloak_user(self, mock_get_keycloak_url, mock_handle_request, mock_logging.info.assert_called_with("User confirmed with id: " + keycloak_id) # Test confirm_practitioner function - @patch('main.handle_request') - @patch('main.get_base_url') - def test_confirm_practitioner_if_practitioner_uuid_not_provided(self, mock_get_base_url, mock_handle_request): - mock_get_base_url.return_value = 'https://example.smartregister.org/fhir' + @patch("main.handle_request") + @patch("main.get_base_url") + def test_confirm_practitioner_if_practitioner_uuid_not_provided( + self, mock_get_base_url, mock_handle_request + ): + mock_get_base_url.return_value = "https://example.smartregister.org/fhir" mocked_user = ( - 'Jenn', 'Doe', 'Jenny', 'jeendoe@example.com', '', 'Supervisor', 'TRUE', - 'a715b562-27f2-432a-b1ba-e57db35e0f93', 'test', 'demo', 'pa$$word' + "Jenn", + "Doe", + "Jenny", + "jeendoe@example.com", + "", + "Supervisor", + "TRUE", + "a715b562-27f2-432a-b1ba-e57db35e0f93", + "test", + "demo", + "pa$$word", ) mocked_response_data = { "resourceType": "Bundle", @@ -935,44 +1013,70 @@ def test_confirm_practitioner_if_practitioner_uuid_not_provided(self, mock_get_b string_response = json.dumps(mocked_response_data) mock_response = (string_response, 200) mock_handle_request.return_value = mock_response - practitioner_exists = confirm_practitioner(mocked_user, "431cb523-253f-4c44-9ded-af42c55c0bbb") - self.assertTrue(practitioner_exists, "Practitioner exist, linked to the provided user") - - @patch('main.logging') - @patch('main.handle_request') - @patch('main.get_base_url') - def test_confirm_practitioner_linked_keycloak_user_and_practitioner(self, mock_get_base_url, mock_handle_request, - mock_logging): - mock_get_base_url.return_value = 'https://example.smartregister.org/fhir' + practitioner_exists = confirm_practitioner( + mocked_user, "431cb523-253f-4c44-9ded-af42c55c0bbb" + ) + self.assertTrue( + practitioner_exists, "Practitioner exist, linked to the provided user" + ) + + @patch("main.logging") + @patch("main.handle_request") + @patch("main.get_base_url") + def test_confirm_practitioner_linked_keycloak_user_and_practitioner( + self, mock_get_base_url, mock_handle_request, mock_logging + ): + mock_get_base_url.return_value = "https://example.smartregister.org/fhir" mocked_user = ( - 'Jenn', 'Doe', 'Jenny', 'jeendoe@example.com', '6cd50351-3ddb-4296-b1db-aac2273e35f3', 'Supervisor', 'TRUE', - 'a715b562-27f2-432a-b1ba-e57db35e0f93', 'test', 'demo', 'pa$$word' + "Jenn", + "Doe", + "Jenny", + "jeendoe@example.com", + "6cd50351-3ddb-4296-b1db-aac2273e35f3", + "Supervisor", + "TRUE", + "a715b562-27f2-432a-b1ba-e57db35e0f93", + "test", + "demo", + "pa$$word", ) mocked_response_data = { "resourceType": "Practitioner", "identifier": [ - { - "use": "official", - "value": "431cb523-253f-4c44-9ded-af42c55c0bbb" - }, - { - "use": "secondary", - "value": "6cd50351-3ddb-4296-b1db-aac2273e35f3" - } + {"use": "official", "value": "431cb523-253f-4c44-9ded-af42c55c0bbb"}, + {"use": "secondary", "value": "6cd50351-3ddb-4296-b1db-aac2273e35f3"}, ], } string_response = json.dumps(mocked_response_data) mock_response = (string_response, 200) mock_handle_request.return_value = mock_response - practitioner_exists = confirm_practitioner(mocked_user, "6cd50351-3ddb-4296-b1db-aac2273e35f3") + practitioner_exists = confirm_practitioner( + mocked_user, "6cd50351-3ddb-4296-b1db-aac2273e35f3" + ) self.assertTrue(practitioner_exists) - self.assertEqual(mocked_response_data["identifier"][1]["value"], "6cd50351-3ddb-4296-b1db-aac2273e35f3") - mock_logging.info.assert_called_with("The Keycloak user and Practitioner are linked as expected") + self.assertEqual( + mocked_response_data["identifier"][1]["value"], + "6cd50351-3ddb-4296-b1db-aac2273e35f3", + ) + mock_logging.info.assert_called_with( + "The Keycloak user and Practitioner are linked as expected" + ) # Test create_user_resources function def test_create_user_resources(self): - user = ('Jenn', 'Doe', 'Jenn', 'jendoe@example.com', '99d54e3c-c26f-4500-a7f9-3f4cb788673f', 'Supervisor', - 'false', 'a715b562-27f2-432a-b1ba-e57db35e0f93', 'test', 'demo', 'pa$$word') + user = ( + "Jenn", + "Doe", + "Jenn", + "jendoe@example.com", + "99d54e3c-c26f-4500-a7f9-3f4cb788673f", + "Supervisor", + "false", + "a715b562-27f2-432a-b1ba-e57db35e0f93", + "test", + "demo", + "pa$$word", + ) user_id = "99d54e3c-c26f-4500-a7f9-3f4cb788673f" payload = create_user_resources(user_id, user) payload_obj = json.loads(payload) @@ -992,7 +1096,7 @@ def test_create_user_resources(self): "properties": { "use": { "type": "string", - "enum": ["official", "secondary"] + "enum": ["official", "secondary"], }, "type": { "type": "object", @@ -1002,18 +1106,22 @@ def test_create_user_resources(self): "items": { "type": "object", "properties": { - "system": {"const": "http://hl7.org/fhir/identifier-type"}, + "system": { + "const": "http://hl7.org/fhir/identifier-type" + }, "code": {"const": "KUID"}, - "display": {"const": "Keycloak user ID"} - } - } + "display": { + "const": "Keycloak user ID" + }, + }, + }, }, - "text": {"const": "Keycloak user ID"} - } + "text": {"const": "Keycloak user ID"}, + }, }, - "value": {"const": "99d54e3c-c26f-4500-a7f9-3f4cb788673f"} - } - } + "value": {"const": "99d54e3c-c26f-4500-a7f9-3f4cb788673f"}, + }, + }, }, "name": { "type": "array", @@ -1022,14 +1130,10 @@ def test_create_user_resources(self): "properties": { "use": {"const": "official"}, "family": {"const": "Doe"}, - "given": { - "type": "array", - "items": { - "type": "string" - } - } - } - }}, + "given": {"type": "array", "items": {"type": "string"}}, + }, + }, + }, }, "required": ["resourceType", "id", "identifier", "name"], } @@ -1060,12 +1164,14 @@ def test_create_user_resources(self): "entity": { "type": "object", "properties": { - "reference": {"const": "Practitioner/99d54e3c-c26f-4500-a7f9-3f4cb788673f"} - } + "reference": { + "const": "Practitioner/99d54e3c-c26f-4500-a7f9-3f4cb788673f" + } + }, } - } - } - } + }, + }, + }, }, "required": ["resourceType", "id", "identifier", "name", "member"], } @@ -1090,9 +1196,11 @@ def test_create_user_resources(self): "practitioner": { "type": "object", "properties": { - "reference": {"const": "Practitioner/99d54e3c-c26f-4500-a7f9-3f4cb788673f"}, - "display": {"const": "Jenn Doe"} - } + "reference": { + "const": "Practitioner/99d54e3c-c26f-4500-a7f9-3f4cb788673f" + }, + "display": {"const": "Jenn Doe"}, + }, }, "code": { "type": "object", @@ -1104,12 +1212,12 @@ def test_create_user_resources(self): "properties": { "system": {"const": "http://snomed.info/sct"}, "code": {"const": "236321002"}, - "display": {"const": "Supervisor (occupation)"} + "display": {"const": "Supervisor (occupation)"}, }, - } + }, } - } - } + }, + }, }, "required": ["resourceType", "id", "identifier", "practitioner", "code"], } @@ -1119,12 +1227,100 @@ def test_create_user_resources(self): "type": "object", "properties": { "method": {"const": "PUT"}, - "url": {"const": "PractitionerRole/f08e0373-932e-5bcb-bdf2-0c28a3c8fdd3"}, + "url": { + "const": "PractitionerRole/f08e0373-932e-5bcb-bdf2-0c28a3c8fdd3" + }, "ifMatch": {"const": "1"}, }, } validate(payload_obj["entry"][2]["request"], request_schema) + @patch("main.set_resource_list") + def test_split_chunk_direct_sync_first_chunk_less_than_size( + self, mock_set_resource_list + ): + chunk = '[{"id": "10", "resourceType": "Patient"}' + next_left_over = split_chunk(chunk, "", 50, {}, "direct") + chunk_list = '[{"id": "10", "resourceType": "Patient"}]' + self.assertEqual(next_left_over, "-") + mock_set_resource_list.assert_called_once_with(chunk_list) + + @patch("main.set_resource_list") + def test_split_chunk_direct_sync_middle_chunk_less_than_size( + self, mock_set_resource_list + ): + chunk = ' "resourceType": "Patient"}' + left_over_chunk = '{"id": "10",' + next_left_over = split_chunk(chunk, left_over_chunk, 50, {}, "direct") + chunk_list = '[{"id": "10", "resourceType": "Patient"}]' + self.assertEqual(next_left_over, "-") + mock_set_resource_list.assert_called_once_with(chunk_list) + + @patch("main.set_resource_list") + def test_split_chunk_direct_sync_last_chunk_less_than_size( + self, mock_set_resource_list + ): + left_over_chunk = '{"id": "10", "resourceType": "Patient"}]' + next_left_over = split_chunk("", left_over_chunk, 50, {}, "direct") + chunk_list = '[{"id": "10", "resourceType": "Patient"}]' + self.assertEqual(next_left_over, "-") + mock_set_resource_list.assert_called_once_with(chunk_list) + + @patch("main.set_resource_list") + def test_split_chunk_direct_sync_first_chunk_greater_than_size( + self, mock_set_resource_list + ): + chunk = '[{"id": "10", "resourceType": "Patient"},{"id": "11", "resourceType":' + next_left_over = split_chunk(chunk, "", 40, {}, "direct") + chunk_list = '[{"id": "10", "resourceType": "Patient"}]' + self.assertEqual(next_left_over, '{"id": "11", "resourceType":') + mock_set_resource_list.assert_called_once_with(chunk_list) + + @patch("main.set_resource_list") + def test_split_chunk_direct_sync_middle_chunk_greater_than_size( + self, mock_set_resource_list + ): + chunk = ': "Task"},{"id": "10", "resourceType": "Patient"},{"id": "11", "resourceType":' + left_over_chunk = '{"id": "09", "resourceType"' + next_left_over = split_chunk(chunk, left_over_chunk, 80, {}, "direct") + chunk_list = '[{"id": "09", "resourceType": "Task"},{"id": "10", "resourceType": "Patient"}]' + self.assertEqual(next_left_over, '{"id": "11", "resourceType":') + mock_set_resource_list.assert_called_once_with(chunk_list) + + @patch("main.set_resource_list") + def test_split_chunk_direct_sync_last_chunk_greater_than_size( + self, mock_set_resource_list + ): + left_over_chunk = '{"id": "10", "resourceType": "Patient"},{"id": "11", "resourceType": "Task"}]' + next_left_over = split_chunk("", left_over_chunk, 43, {}, "direct") + chunk_list = '[{"id": "10", "resourceType": "Patient"},{"id": "11", "resourceType": "Task"}]' + self.assertEqual(next_left_over, "") + mock_set_resource_list.assert_called_once_with(chunk_list) + + @patch("main.set_resource_list") + @patch("main.build_resource_type_map") + def test_split_chunk_sort_sync_first_chunk_less_than_size( + self, mock_build_resource_type_map, mock_set_resource_list + ): + chunk = '[{"id": "10", "resourceType": "Patient"},{"id": "11"' + next_left_over = split_chunk(chunk, "", 50, {}, "sort") + chunk_list = '[{"id": "10", "resourceType": "Patient"}]' + self.assertEqual(next_left_over, '{"id": "11"') + mock_set_resource_list.assert_not_called() + mock_build_resource_type_map.assert_called_once_with(chunk_list, {}, 0) + + def test_build_resource_type_map(self): + json_file = "tests/json/sample.json" + mapping = read_file_in_chunks(json_file, 300, "sort") + mapped_resources = { + "Patient": [0], + "Practitioner": [1, 5], + "Location": [2, 4], + "Observation": [3], + } + self.assertIsInstance(mapping, dict) + self.assertEqual(mapping, mapped_resources) + if __name__ == "__main__": unittest.main() diff --git a/importer/tests/json/sample.json b/importer/tests/json/sample.json new file mode 100644 index 00000000..bdbf0305 --- /dev/null +++ b/importer/tests/json/sample.json @@ -0,0 +1 @@ +[{"id":"d41204ad-8284-4131-b21e-d200d191ff3e","resourceType":"Patient","active":true,"name":[{"family":"Brown"}],"gender":"male","birthDate":"1995-07-01"},{"id":"878d85b3-2856-4568-9429-346fa054de46","resourceType":"Practitioner","active":true,"name":[{"use":"official","family":"Doe"}]},{"id":"3a536e47-ae09-463f-9df2-64221db82c96","resourceType":"Location","status":"active","name":"Nairobi"},{"id":"b3088bb3-dbcc-4019-92a8-2ca93682b63b","resourceType":"Observation","code":{"coding":[{"system":"https://www.snomed.org","code":"75753009","display":"Blood clots"}]}},{"id":"1a95c0b2-f903-4f32-be4f-6270ef604c5f","resourceType":"Location","status":"active","name":"Mombasa"},{"id":"62a2755b-5136-4bc0-882a-16aafa1c2083","resourceType":"Practitioner","active":true,"name":[{"use":"official","family":"Doe"}]}] \ No newline at end of file