Skip to content

Commit

Permalink
Data endpoint enhancements (#2477)
Browse files Browse the repository at this point in the history
* add endpoint /api/v1/forms/<form_id>/regenerate-submission-metadata

add endpoint force json update for all submissions under current form

* add async task to regenerate json for form instances

refactor the /api/v1/forms/<form_id>/regenerate-submission-metadata endpoint to only trigger async task if task has failed or does not exist in cache

* update method docstring

* update method docstring

* update docstring

* update docstring

* update doc strings

* update code comment

* update doc string

* update comment

* refactor code

* update doc string

* update doc string

* update docstring

* update doc string

* update test case

* update documentation

* format documentation

* fix pagination link header missing in /api/v1/data/<form_id> when sorting

when sort query parameter is applied with pagination, the pagination link header was missing

* update documentation

add documentation for endpoint /api/v1/forms/{pk}/regenerate-submission-metadata

* update documentation

* fix wrongly formatted link in docs

* fix linting error

fix error pycodestyle: E501 / line too long (90 > 88 characters) (col 89)

* fix failing tests

* fix linting error

fix unused argument self

* re-revert 0 float value converted to int accidentially during save

* readd decimal precision accidentally removed during save

* fix failing tests

* fix failing tests

* fix failing tests

* fix failing test

* fix failing tests

* address lint error

address unused-argument, wrong-import-order

* fix bug argument after * must be an iterable

* fix Instance model json _date_modified, _submission_time out of sync

Instance model date_modified and date_created fiels were out of sync with their aliases in the json field

* fix typo

* fix failing test case

* fix failing tests

* refactor code and fix failing tests

* remove redundant method call

* refactor code

* address lint error

address invalid-name, missing-function-docstring

* update docstring

* refactor code

* refactor code

* fix failing tests

* fix typo and refactor code

* remove json getter in model Instance

there is already a json field present that already has the result the getter is recalculating

* readd getter method removed

* use helper method to read async task state

* refactor code

* do not set json when regenerating json asynchronously

json is set in the post_save signal so setting it explicitly is unnecessary

* refactor code

* update comment

* fix bug generator object has no attribute count

bug appears when pagination is used with sort and query query paramaters

* convert endpoint /api/v1/forms/<form_id>/regenerate-submission-metadata into command

convert endpoint into Django custom command

* handle edge case in regenerate_form_instance_json async task

ensure we do not regenerate instance json if instance json has already been regenerated

* address lint error

consider-using-f-string / Formatting a regular string which could be a f-string

* address lint error

address raise-missing-from, consider-using-f-string
update docstring

* address lint errors

* address lint error

* address lint errors

* fix incorrect lint error suppression

* suppress lint error
  • Loading branch information
kelvin-muchiri authored Sep 28, 2023
1 parent e98a41a commit 7eee153
Show file tree
Hide file tree
Showing 36 changed files with 780 additions and 366 deletions.
7 changes: 6 additions & 1 deletion docs/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,12 @@ Response

Query submitted data of a specific form
----------------------------------------
Use the `query` or `data` parameter to pass in a JSON key/value query.
Use the `query` or `data` parameter to pass in a JSON key/value query.

When quering a date time field whose value is in ISO format such as ``2020-12-18T09:36:19.767455+00:00``, it is important to ensure the ``+`` (plus) is encoded to ``%2b``.

``+`` without encoding is parsed as whitespace. So ``2020-12-18T09:36:19.767455+00:00`` should be converted to ``2020-12-18T09:36:19.767455%2b00:00``.


Example I
^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion docs/forms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1627,4 +1627,4 @@ If the upload is still running:
HTTP 202 Accepted
{
"job_status": "PENDING"
}
}
37 changes: 37 additions & 0 deletions onadata/apps/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,15 @@
from onadata.apps.api import tools
from onadata.libs.utils.email import send_generic_email
from onadata.libs.utils.model_tools import queryset_iterator
from onadata.libs.utils.cache_tools import (
safe_delete,
XFORM_REGENERATE_INSTANCE_JSON_TASK,
)
from onadata.apps.logger.models import Instance, ProjectInvitation, XForm
from onadata.libs.utils.email import ProjectInvitationEmail
from onadata.celeryapp import app


User = get_user_model()


Expand Down Expand Up @@ -145,3 +150,35 @@ def send_project_invitation_email_async(
else:
email = ProjectInvitationEmail(invitation, url)
email.send()


@app.task(track_started=True)
def regenerate_form_instance_json(xform_id: int):
"""Regenerate a form's instances json
Json data recreated afresh and any existing json data is overriden
"""
try:
xform: XForm = XForm.objects.get(pk=xform_id)
except XForm.DoesNotExist as err:
logging.exception(err)

else:
if not xform.is_instance_json_regenerated:
instances = xform.instances.filter(deleted_at__isnull=True)

for instance in queryset_iterator(instances):
# We do not want to trigger Model.save or any signal
# Queryset.update is a workaround to achieve this.
# Instance.save and the post/pre signals may contain
# some side-effects which we are not interested in e.g
# updating date_modified which we do not want
Instance.objects.filter(pk=instance.pk).update(
json=instance.get_full_dict()
)

xform.is_instance_json_regenerated = True
xform.save()
# Clear cache used to store the task id from the AsyncResult
cache_key = f"{XFORM_REGENERATE_INSTANCE_JSON_TASK}{xform_id}"
safe_delete(cache_key)
2 changes: 1 addition & 1 deletion onadata/apps/api/tests/fixtures/osm/osm.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
photo,osm_road,osm_building,fav_color,form_completed,meta/instanceID,_uuid,_submission_time,_tags,_notes,_version,_duration,_submitted_by,_total_media,_media_count,_media_all_received,osm_road:ctr:lat,osm_road:ctr:lon,osm_road:highway,osm_road:lanes,osm_road:name,osm_road:way:id,osm_building:building,osm_building:building:levels,osm_building:ctr:lat,osm_building:ctr:lon,osm_building:name,osm_building:way:id
1424308569120.jpg,OSMWay234134797.osm,OSMWay34298972.osm,red,2015-02-19T04:18:21.427+03,uuid:d3ef929e-e3e7-456c-9f27-7679c0074f4f,d3ef929e-e3e7-456c-9f27-7679c0074f4f,2013-02-18T15:54:01,,,201511091147,,bob,3,2,False,23.708174238006087,90.40946505581161,tertiary,2,Patuatuli Road,234134797,yes,4,23.707316084046038,90.40849938337506,kol,34298972
1424308569120.jpg,OSMWay234134797.osm,OSMWay34298972.osm,red,2015-02-19T04:18:21.427+03,uuid:d3ef929e-e3e7-456c-9f27-7679c0074f4f,d3ef929e-e3e7-456c-9f27-7679c0074f4f,2013-02-18T15:54:01+00:00,,,201511091147,,bob,3,2,False,23.708174238006087,90.40946505581161,tertiary,2,Patuatuli Road,234134797,yes,4,23.707316084046038,90.40849938337506,kol,34298972
73 changes: 72 additions & 1 deletion onadata/apps/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
"""Tests for module onadata.apps.api.tasks"""
import sys

from unittest.mock import patch

from django.core.cache import cache

from onadata.apps.main.tests.test_base import TestBase
from onadata.apps.api.tasks import (
send_project_invitation_email_async,
regenerate_form_instance_json,
)
from onadata.apps.logger.models import ProjectInvitation
from onadata.apps.logger.models import ProjectInvitation, Instance
from onadata.libs.utils.user_auth import get_user_default_project
from onadata.libs.utils.email import ProjectInvitationEmail

Expand All @@ -30,3 +34,70 @@ def test_sends_email(self, mock_send):
url = "https://example.com/register"
send_project_invitation_email_async(self.invitation.id, url)
mock_send.assert_called_once()


class RegenerateFormInstanceJsonTestCase(TestBase):
"""Tests for regenerate_form_instance_json"""

def test_regenerates_instances_json(self):
"""Regenerates instances json"""

def mock_get_full_dict(self): # pylint: disable=unused-argument
return {}

with patch.object(Instance, "get_full_dict", mock_get_full_dict):
self._publish_transportation_form_and_submit_instance()

cache_key = f"xfm-regenerate_instance_json_task-{self.xform.pk}"
cache.set(cache_key, "foo")
instance = self.xform.instances.first()
self.assertFalse(instance.json)
self.assertFalse(self.xform.is_instance_json_regenerated)
regenerate_form_instance_json.delay(self.xform.pk)
instance.refresh_from_db()
self.assertTrue(instance.json)
self.xform.refresh_from_db()
self.assertTrue(self.xform.is_instance_json_regenerated)
# task_id stored in cache should be deleted
self.assertIsNone(cache.get(cache_key))

def test_json_overriden(self):
"""Existing json is overriden"""

def mock_get_full_dict(self): # pylint: disable=unused-argument
return {"foo": "bar"}

with patch.object(Instance, "get_full_dict", mock_get_full_dict):
self._publish_transportation_form_and_submit_instance()

instance = self.xform.instances.first()
self.assertEqual(instance.json.get("foo"), "bar")
regenerate_form_instance_json.delay(self.xform.pk)
instance.refresh_from_db()
self.assertFalse("foo" in instance.json)

@patch("logging.exception")
def test_form_id_invalid(self, mock_log_exception):
"""An invalid xform_id is handled"""

regenerate_form_instance_json.delay(sys.maxsize)

mock_log_exception.assert_called_once()

def test_already_generated(self):
"""Regeneration fails for a form whose regeneration has already been done"""

def mock_get_full_dict(self): # pylint: disable=unused-argument
return {}

with patch.object(Instance, "get_full_dict", mock_get_full_dict):
self._publish_transportation_form_and_submit_instance()

self.xform.is_instance_json_regenerated = True
self.xform.save()
instance = self.xform.instances.first()
self.assertFalse(instance.json)
self.assertTrue(self.xform.is_instance_json_regenerated)
regenerate_form_instance_json.delay(self.xform.pk)
instance.refresh_from_db()
self.assertFalse(instance.json)
130 changes: 89 additions & 41 deletions onadata/apps/api/tests/viewsets/test_data_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
import logging
import os
import pytz
import csv
from io import StringIO
from builtins import open
Expand All @@ -29,7 +30,7 @@
from django_digest.test import DigestAuth
from flaky import flaky
from httmock import HTTMock, urlmatch
from mock import patch
from mock import patch, Mock

from onadata.apps.api.tests.viewsets.test_abstract_viewset import (
TestAbstractViewSet,
Expand Down Expand Up @@ -606,7 +607,7 @@ def test_data_pagination(self):
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 4)

# Query param returns correct pagination headers
# Pagination works with "query" query parameter
request = self.factory.get(
"/", data={"page_size": "1", "query": "ambulance"}, **self.extra
)
Expand All @@ -616,6 +617,48 @@ def test_data_pagination(self):
self.assertEqual(
response["Link"], ('<http://testserver/?page=2&page_size=1>; rel="next"')
)
self.assertEqual(len(response.data), 1)
# Pagination works with "sort" query parametr
instances = self.xform.instances.all().order_by("-date_modified")
self.assertEqual(instances.count(), 4)
request = self.factory.get(
"/",
data={"page": "1", "page_size": "2", "sort": '{"date_modified":-1}'},
**self.extra,
)
response = view(request, pk=formid)
self.assertEqual(response.status_code, 200)
self.assertIn("Link", response)
self.assertEqual(
response["Link"], ('<http://testserver/?page=2&page_size=2>; rel="next"')
)
self.assertEqual(len(response.data), 2)
self.assertEqual(response.data[0]["_id"], instances[0].pk)
# Pagination works with multiple query params
instances = (
self.xform.instances.all()
.order_by("-date_modified")
.extra(where=["json::text ~* cast(%s as text)"], params=["ambulance"])
)
self.assertEqual(instances.count(), 2)
request = self.factory.get(
"/",
data={
"page": "1",
"page_size": "1",
"sort": '{"date_modified":-1}',
"query": "ambulance",
},
**self.extra,
)
response = view(request, pk=formid)
self.assertEqual(response.status_code, 200)
self.assertIn("Link", response)
self.assertEqual(
response["Link"], ('<http://testserver/?page=2&page_size=1>; rel="next"')
)
self.assertEqual(len(response.data), 1)
self.assertEqual(response.data[0]["_id"], instances[0].pk)

def test_sort_query_param_with_invalid_values(self):
self._make_submissions()
Expand Down Expand Up @@ -1070,27 +1113,27 @@ def test_filter_by_date_modified(self):
self._make_submissions()
view = DataViewSet.as_view({"get": "list"})
request = self.factory.get("/", **self.extra)
formid = self.xform.pk
instance = self.xform.instances.all().order_by("pk")[0]
response = view(request, pk=formid)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 4)

instance = self.xform.instances.all().order_by("-date_created")[0]
instances = self.xform.instances.all().order_by("pk")
self.assertEqual(len(instances), 4)
instance = instances[2]
date_modified = instance.date_modified.isoformat()

query_str = '{"_date_modified": {"$gte": "%s"},' ' "_submitted_by": "%s"}' % (
date_modified,
"bob",
)
# greater than or equal to
query_str = '{"_date_modified": {"$gte": "%s"}}' % date_modified
data = {"query": query_str}
request = self.factory.get("/", data=data, **self.extra)
response = view(request, pk=formid)
response = view(request, pk=self.xform.pk)
self.assertEqual(response.status_code, 200)
expected_count = self.xform.instances.filter(
date_modified__gte=date_modified
).count()
self.assertEqual(len(response.data), expected_count)
self.assertEqual(len(response.data), 2)
self.assertEqual(response.data[0]["_id"], instances[2].pk)
self.assertEqual(response.data[1]["_id"], instances[3].pk)
# greater than
query_str = '{"_date_modified": {"$gt": "%s"}}' % date_modified
data = {"query": query_str}
request = self.factory.get("/", data=data, **self.extra)
response = view(request, pk=self.xform.pk)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 1)
self.assertEqual(response.data[0]["_id"], instances[3].pk)

def test_filter_by_submission_time_and_submitted_by_with_data_arg(self):
self._make_submissions()
Expand Down Expand Up @@ -2339,7 +2382,8 @@ def test_geotraces_in_repeats(self):
| | end repeat |
"""
self.xform = self._publish_markdown(
md, self.user, self.project, id_string="geotraces")
md, self.user, self.project, id_string="geotraces"
)
# publish submissions
self._publish_submit_geoms_in_repeats("Geotraces")
view = DataViewSet.as_view({"get": "list"})
Expand Down Expand Up @@ -2400,7 +2444,8 @@ def test_geoshapes_in_repeats(self):
| | end repeat |
"""
self.xform = self._publish_markdown(
md, self.user, self.project, id_string="geoshapes")
md, self.user, self.project, id_string="geoshapes"
)
# publish submissions
self._publish_submit_geoms_in_repeats("Geoshapes")
view = DataViewSet.as_view({"get": "list"})
Expand Down Expand Up @@ -3194,7 +3239,7 @@ def test_floip_format(self):
floip_list = json.loads(response.content)
self.assertTrue(isinstance(floip_list, list))
floip_row = [x for x in floip_list if x[-2] == "none"][0]
self.assertEqual(floip_row[0], response.data[0]["_submission_time"] + "+00:00")
self.assertEqual(floip_row[0], response.data[0]["_submission_time"])
self.assertEqual(floip_row[2], "bob")
self.assertEqual(floip_row[3], response.data[0]["_uuid"])
self.assertEqual(
Expand Down Expand Up @@ -3361,24 +3406,27 @@ def test_data_list_xml_format(self):
"""Test DataViewSet list XML"""
# create submission
media_file = "1335783522563.jpg"
self._make_submission_w_attachment(
os.path.join(
self.this_directory,
"fixtures",
"transportation",
"instances",
"transport_2011-07-25_19-05-49_2",
"transport_2011-07-25_19-05-49_2.xml",
),
os.path.join(
self.this_directory,
"fixtures",
"transportation",
"instances",
"transport_2011-07-25_19-05-49_2",
media_file,
),
)
mocked_now = datetime.datetime(2023, 9, 20, 12, 49, 0, tzinfo=pytz.utc)

with patch("django.utils.timezone.now", Mock(return_value=mocked_now)):
self._make_submission_w_attachment(
os.path.join(
self.this_directory,
"fixtures",
"transportation",
"instances",
"transport_2011-07-25_19-05-49_2",
"transport_2011-07-25_19-05-49_2.xml",
),
os.path.join(
self.this_directory,
"fixtures",
"transportation",
"instances",
"transport_2011-07-25_19-05-49_2",
media_file,
),
)

view = DataViewSet.as_view({"get": "list"})
request = self.factory.get("/", **self.extra)
Expand All @@ -3394,7 +3442,7 @@ def test_data_list_xml_format(self):
returned_xml = response.content.decode("utf-8")
server_time = ET.fromstring(returned_xml).attrib.get("serverTime")
edited = instance.last_edited is not None
submission_time = instance.date_created.strftime(MONGO_STRFTIME)
submission_time = instance.date_created.isoformat()
attachment = instance.attachments.first()
expected_xml = (
'<?xml version="1.0" encoding="utf-8"?>\n'
Expand Down
Loading

0 comments on commit 7eee153

Please sign in to comment.