Skip to content

Commit

Permalink
Add support for OR operation data filter for date fields (#2704)
Browse files Browse the repository at this point in the history
* add support for querying "or" data by meta data fields

* add test

* add test

* remove extra underscore

* add tests

* add test

* refactor code

* add support for ISO date format when filtering data

* refactor code

* refactor code

* refactor code

* update documentation

* add support for date format %Y-%m-%dT%H:%M:%S%z data endpoint filter

* remove unnecessary parsing for dates

* revert modified test

* update docs

* update docs

* update docs

* update docs

* update tests

* remove unsupport example from docs

* remove tests for unsupported operation

* handle invalida date formats when filtering data

* rename tests

* update doc string

* remove commented code

* update tag for ona-oidc

* enhance error message

* fix import lint error

fix wrong-import-order / standard import "from builtins import str as text" should be placed before "import six"

* fix lint error

fix wrong-import-order / third party import "import six" should be placed before "from onadata.libs.utils.common_tags import KNOWN_DATE_FORMATS"

* fix indentation

* enhance error message

* fix lint error line-too-long
  • Loading branch information
kelvin-muchiri authored Sep 19, 2024
1 parent 2b26f35 commit b5cde55
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 11 deletions.
18 changes: 18 additions & 0 deletions docs/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,15 @@ Query submitted data of a specific form
----------------------------------------
Use the `query` or `data` parameter to pass in a JSON key/value query.

ISO 8601 date formats are supported. Below are examples of common formats:

- ``YYYY-MM-DD`` (e.g., 2024-09-18)
- ``YYYY-MM-DDThh:mm:ss`` (e.g., 2024-09-18T14:30:00)
- ``YYYY-MM-DDThh:mm:ssZ`` (e.g., 2024-09-18T14:30:00Z)
- ``YYYY-MM-DDThh:mm:ss.ssssssZ`` (e.g., 2024-09-18T14:30:00.169615Z)
- ``YYYY-MM-DDThh:mm:ss±hh:mm`` (e.g., 2024-09-17T13:39:40+00:00)
- ``YYYY-MM-DDThh:mm:ss.ssssss±hh:mm`` (e.g., 2024-09-17T13:39:40.169615+00:00)

When quering a date time field whose value is in ISO format such as ``2020-12-18T09:36:19.767455+00:00``, it is important to ensure the ``+`` (plus) is encoded to ``%2b``.

``+`` without encoding is parsed as whitespace. So ``2020-12-18T09:36:19.767455+00:00`` should be converted to ``2020-12-18T09:36:19.767455%2b00:00``.
Expand Down Expand Up @@ -672,6 +681,15 @@ Query submissions with `NULL` submission review status

curl -X GET https://api.ona.io/api/v1/data/22845?query={"_review_status": null}

Example XIII
^^^^^^^^^^^^

Query submissions collected within specific dates or edited within specific dates.

::

curl -X GET https://api.ona.io/api/v1/data/22845?query={"$or": [{"_submission_time":{"$gte": "2020-01-01", "$lte": "2020-08-31"}}, {"_last_edited":{"$gte": "2020-01-01", "$lte": "2020-08-31"}}]}

All Filters Options

Expand Down
73 changes: 73 additions & 0 deletions onadata/apps/api/tests/viewsets/test_data_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3513,6 +3513,79 @@ def test_data_query_ornull(self):
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 4)

def test_or_with_date_filters(self):
"""OR operation filter works for date fields"""
view = DataViewSet.as_view({"get": "list"})
# Mock date_created (_submission_time)
with patch(
"django.utils.timezone.now",
Mock(return_value=datetime.datetime(2024, 9, 16, tzinfo=timezone.utc)),
):
Instance.objects.create(
xform=self.xform,
xml='<data id="b"><fruit>mango</fruit></data>',
)
# Mock date_created (_submission_time)
with patch(
"django.utils.timezone.now",
Mock(return_value=datetime.datetime(2024, 9, 18, tzinfo=timezone.utc)),
):
Instance.objects.create(
xform=self.xform,
xml='<data id="b"><fruit>mango</fruit></data>',
)
# Mock date_created (_submission_time)
with patch(
"django.utils.timezone.now",
Mock(return_value=datetime.datetime(2022, 4, 1, tzinfo=timezone.utc)),
):
Instance.objects.create(
xform=self.xform,
last_edited=datetime.datetime(2023, 4, 1, tzinfo=timezone.utc),
xml='<data id="b"><fruit>mango</fruit></data>',
)
# Mock date_created (_submission_time)
with patch(
"django.utils.timezone.now",
Mock(return_value=datetime.datetime(2022, 4, 1, tzinfo=timezone.utc)),
):
Instance.objects.create(
xform=self.xform,
last_edited=datetime.datetime(2023, 5, 1, tzinfo=timezone.utc),
xml='<data id="b"><fruit>mango</fruit></data>',
)

query_str = (
'{"$or": [{"_submission_time":{"$gte": "2024-09-16", "$lte": "2024-09-18"}}, '
'{"_last_edited":{"$gte": "2023-04-01", "$lte": "2023-05-01"}}]}'
)
request = self.factory.get("/?query=%s" % query_str, **self.extra)
response = view(request, pk=self.xform.pk)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 4)
query_str = (
'{"$or": [{"_submission_time":{"$gte": "2024-09-16"}}, '
'{"_last_edited":{"$gte": "2023-05-01"}}]}'
)
request = self.factory.get("/?query=%s" % query_str, **self.extra)
response = view(request, pk=self.xform.pk)
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 3)

def test_invalid_date_filters(self):
"""Invalid date filters are handled appropriately"""
view = DataViewSet.as_view({"get": "list"})

for json_date_field in ["_submission_time", "_date_modified", "_last_edited"]:
query_str = '{"%s": {"$lte": "watermelon"}}' % json_date_field
request = self.factory.get("/?query=%s" % query_str, **self.extra)
response = view(request, pk=self.xform.pk)
self.assertEqual(response.status_code, 400)
self.assertEqual(
f"{response.data['detail']}",
f'Invalid date value "watermelon" for the field {json_date_field}.',
)

def test_data_list_xml_format(self):
"""Test DataViewSet list XML"""
# create submission
Expand Down
41 changes: 35 additions & 6 deletions onadata/apps/viewer/parsed_instance_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
import json
from builtins import str as text
from typing import Any, Tuple

import six

from onadata.libs.utils.common_tags import DATE_FORMAT, MONGO_STRFTIME
from django.utils.translation import gettext_lazy as _

from onadata.libs.utils.common_tags import KNOWN_DATE_FORMATS
from onadata.libs.exceptions import InavlidDateFormat

KNOWN_DATES = ["_submission_time"]

KNOWN_DATES = ["_submission_time", "_last_edited", "_date_modified"]
NONE_JSON_FIELDS = {
"_submission_time": "date_created",
"_date_modified": "date_modified",
Expand Down Expand Up @@ -62,11 +65,23 @@ def _parse_where(query, known_integers, known_decimals, or_where, or_params):
_v = value
if field_key in KNOWN_DATES:
raw_date = value
for date_format in (MONGO_STRFTIME, DATE_FORMAT):
is_date_valid = False
for date_format in KNOWN_DATE_FORMATS:
try:
_v = datetime.datetime.strptime(raw_date[:19], date_format)
_v = datetime.datetime.strptime(raw_date, date_format)
except ValueError:
pass
is_date_valid = False
else:
is_date_valid = True
break

if not is_date_valid:
err_msg = _(
f'Invalid date value "{value}" '
f"for the field {field_key}."
)
raise InavlidDateFormat(err_msg)

if field_key in NONE_JSON_FIELDS:
where_params.extend([text(_v)])
else:
Expand Down Expand Up @@ -131,6 +146,20 @@ def get_where_clause(query, form_integer_fields=None, form_decimal_fields=None):

for or_query in or_dict:
for key, value in or_query.items():
if key in NONE_JSON_FIELDS:
and_query_where, and_query_where_params = _parse_where(
or_query,
known_integers,
known_decimals,
[],
[],
)
or_where.extend(
["".join(["(", " AND ".join(and_query_where), ")"])]
)
or_params.extend(and_query_where_params)
continue

if value is None:
or_where.extend([f"json->>'{key}' IS NULL"])
elif isinstance(value, list):
Expand Down
45 changes: 44 additions & 1 deletion onadata/apps/viewer/tests/test_parsed_instance.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os

from datetime import datetime

from rest_framework.exceptions import ParseError

from onadata.apps.logger.models.instance import Instance
from onadata.apps.main.models.user_profile import UserProfile
from onadata.apps.main.tests.test_base import TestBase
Expand Down Expand Up @@ -90,6 +92,47 @@ def test_get_where_clause_with_integer(self):
self.assertEqual(where, ["json::text ~* cast(%s as text)"])
self.assertEqual(where_params, [11])

def test_get_where_clause_or_date_range(self):
"""OR operation get_where_clause with date range"""
query = (
'{"$or": [{"_submission_time":{"$gte": "2024-09-17T13:39:40.001694+00:00", '
'"$lte": "2024-09-17T13:39:40.001694+00:00"}}, '
'{"_last_edited":{"$gte": "2024-04-01T13:39:40.001694+00:00", '
'"$lte": "2024-04-01T13:39:40.001694+00:00"}}, '
'{"_date_modified":{"$gte": "2024-04-01T13:39:40.001694+00:00", '
'"$lte": "2024-04-01T13:39:40.001694+00:00"}}]}'
)
where, where_params = get_where_clause(query)
self.assertEqual(
where,
[
(
"((date_created >= %s AND date_created <= %s) OR "
"(last_edited >= %s AND last_edited <= %s) OR "
"(date_modified >= %s AND date_modified <= %s))"
)
],
)
self.assertEqual(
where_params,
[
"2024-09-17 13:39:40.001694+00:00",
"2024-09-17 13:39:40.001694+00:00",
"2024-04-01 13:39:40.001694+00:00",
"2024-04-01 13:39:40.001694+00:00",
"2024-04-01 13:39:40.001694+00:00",
"2024-04-01 13:39:40.001694+00:00",
],
)

def test_invalid_date_format(self):
"""Inavlid date format is handled"""
for json_date_field in ["_submission_time", "_date_modified", "_last_edited"]:
query = {json_date_field: {"$lte": "watermelon"}}

with self.assertRaises(ParseError):
get_where_clause(query)

def test_retrieve_records_based_on_form_verion(self):
self._create_user_and_login()
self._publish_transportation_form()
Expand Down
8 changes: 7 additions & 1 deletion onadata/libs/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Custom Expecting classes."""
from django.utils.translation import gettext_lazy as _

from rest_framework.exceptions import APIException
from rest_framework.exceptions import APIException, ParseError


class EnketoError(Exception):
Expand Down Expand Up @@ -35,3 +35,9 @@ class ServiceUnavailable(APIException):

status_code = 503
default_detail = "Service temporarily unavailable, try again later."


class InavlidDateFormat(ParseError):
"""Raise when request query has invalid date."""

default_detail = _("Invalid date format.")
6 changes: 6 additions & 0 deletions onadata/libs/utils/common_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@
# datetime format that we store in mongo
MONGO_STRFTIME = "%Y-%m-%dT%H:%M:%S"
DATE_FORMAT = "%Y-%m-%d"
KNOWN_DATE_FORMATS = [
DATE_FORMAT,
MONGO_STRFTIME,
"%Y-%m-%dT%H:%M:%S%z",
"%Y-%m-%dT%H:%M:%S.%f%z",
]

# how to represent N/A in exports
NA_REP = "n/a"
Expand Down
2 changes: 1 addition & 1 deletion requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ git+https://github.com/onaio/django-digest.git@6bf61ec08502fd3545d4f2c0838b6cb15
git+https://github.com/onaio/django-multidb-router.git@f711368180d58eef87eda54fadfd5f8355623d52#egg=django-multidb-router
git+https://github.com/onaio/floip-py.git@3c980eb184069ae7c3c9136b18441978237cd41d#egg=pyfloip
git+https://github.com/onaio/python-json2xlsclient.git@62b4645f7b4f2684421a13ce98da0331a9dd66a0#egg=python-json2xlsclient
git+https://github.com/onaio/ona-oidc.git@pytz-deprecated#egg=ona-oidc
git+https://github.com/onaio/ona-oidc.git@v1.0.4#egg=ona-oidc
-e git+https://github.com/onaio/savreaderwriter.git@fix-pep-440-issues#egg=savreaderwriter
2 changes: 1 addition & 1 deletion requirements/base.pip
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ oauthlib==3.2.2
# via
# django-oauth-toolkit
# requests-oauthlib
ona-oidc @ git+https://github.com/onaio/ona-oidc.git@pytz-deprecated
ona-oidc @ git+https://github.com/onaio/ona-oidc.git@v1.0.4
# via -r requirements/base.in
openpyxl==3.1.2
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements/dev.pip
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ oauthlib==3.2.2
# via
# django-oauth-toolkit
# requests-oauthlib
ona-oidc @ git+https://github.com/onaio/ona-oidc.git@pytz-deprecated
ona-oidc @ git+https://github.com/onaio/ona-oidc.git@v1.0.4
# via -r requirements/base.in
openpyxl==3.1.2
# via
Expand Down

0 comments on commit b5cde55

Please sign in to comment.