From 356af533c62c8725f9b7585f5ad6b5cfd3af1809 Mon Sep 17 00:00:00 2001 From: Jeferson Moura Date: Mon, 3 Sep 2018 11:33:05 +0200 Subject: [PATCH] Add pagination to Elasticsearch results --- search/tests/test_views.py | 151 +++++++++++++++++++++++++++++++++++++ search/views.py | 65 ++++++++++------ 2 files changed, 194 insertions(+), 22 deletions(-) create mode 100644 search/tests/test_views.py diff --git a/search/tests/test_views.py b/search/tests/test_views.py new file mode 100644 index 000000000..f6632e2ab --- /dev/null +++ b/search/tests/test_views.py @@ -0,0 +1,151 @@ +import json +import logging +from unittest import skipIf + +from django.conf import settings +from django.test import TestCase, tag + +from rest_framework.test import APIRequestFactory + +import factories +from search import views +from search.utils import ElasticsearchIndexer +from workflow.models import Organization, WorkflowLevel1, ROLE_PROGRAM_ADMIN + + +@tag('pkg') +@skipIf(not settings.ELASTICSEARCH_URL, "Elasticsearch config not found") +class ElasticsearchSearchTest(TestCase): + indexer = None + + def setUp(self): + logging.disable(logging.ERROR) + settings.ELASTICSEARCH_ENABLED = True + self.org = Organization.objects.create( + organization_uuid="index-workflowlevel1-test") + self.indexer = ElasticsearchIndexer() + self.factory = APIRequestFactory() + self.tola_user = factories.TolaUser(organization=self.org) + + def tearDown(self): + logging.disable(logging.NOTSET) + settings.ELASTICSEARCH_ENABLED = False + + def test_es_connection(self): + self.assertTrue(self.indexer.es.ping(), + "Cannot connect to Elasticsearch. URL: {}".format( + settings.ELASTICSEARCH_URL)) + + def test_search_for_workflowlevel1(self): + wflvl1s = factories.WorkflowLevel1.create_batch( + 15, organization=self.org) + role_program_admin = factories.Group(name=ROLE_PROGRAM_ADMIN) + + for wflvl1 in wflvl1s: + factories.WorkflowTeam( + workflow_user=self.tola_user, + workflowlevel1=wflvl1, + role=role_program_admin + ) + + # get data from all the indexes + request = self.factory.get('') + request.user = self.tola_user.user + response = views.search(request, '_all', 'health') + content = json.loads(response.content) + + # we have data only for workflowlevel1 + self.assertTrue(len(content['workflowlevel1']) > 0) + self.assertTrue(len(content['workflowlevel2']) == 0) + self.assertTrue(len(content['indicators']) == 0) + self.assertTrue(len(content['collected_data']) == 0) + + wflvl1_1 = content['workflowlevel1'][0]['_source'] + search_after = content['workflowlevel1'][0]['sort'] + + # get more data using the cursor + request = self.factory.get('?search_after={},{}'.format( + search_after[0], search_after[1])) + request.user = self.tola_user.user + response = views.search(request, '_workflow_level1', 'health') + content = json.loads(response.content) + + self.assertTrue(len(content['workflowlevel1']) > 0) + + wflvl1_2 = content['workflowlevel1'][0]['_source'] + self.assertNotEqual(wflvl1_1['level1_uuid'], wflvl1_2['level1_uuid']) + + def test_search_for_workflowlevel2(self): + role_program_admin = factories.Group(name=ROLE_PROGRAM_ADMIN) + wflvl1 = factories.WorkflowLevel1(organization=self.org) + factories.WorkflowTeam( + workflow_user=self.tola_user, + workflowlevel1=wflvl1, + role=role_program_admin + ) + factories.WorkflowLevel2.create_batch(15, workflowlevel1=wflvl1) + + # get data from all the indexes + request = self.factory.get('') + request.user = self.tola_user.user + response = views.search(request, '_all', 'help') + content = json.loads(response.content) + + # we have data only for workflowlevel2 + self.assertTrue(len(content['workflowlevel1']) == 0) + self.assertTrue(len(content['workflowlevel2']) > 0) + self.assertTrue(len(content['indicators']) == 0) + self.assertTrue(len(content['collected_data']) == 0) + + wflvl2_1 = content['workflowlevel2'][0]['_source'] + search_after = content['workflowlevel2'][0]['sort'] + + # get more data using the cursor + request = self.factory.get('?search_after={},{}'.format( + search_after[0], search_after[1])) + request.user = self.tola_user.user + response = views.search(request, '_workflow_level2', 'help') + + content = json.loads(response.content) + self.assertTrue(len(content['workflowlevel2']) > 0) + + wflvl2_2 = content['workflowlevel2'][0]['_source'] + self.assertNotEqual(wflvl2_1['level2_uuid'], wflvl2_2['level2_uuid']) + + def test_search_for_indicator(self): + role_program_admin = factories.Group(name=ROLE_PROGRAM_ADMIN) + wflvl1 = factories.WorkflowLevel1(organization=self.org) + factories.WorkflowTeam( + workflow_user=self.tola_user, + workflowlevel1=wflvl1, + role=role_program_admin + ) + factories.Indicator.create_batch(15, workflowlevel1=[wflvl1]) + + # get data from all the indexes + request = self.factory.get('') + request.user = self.tola_user.user + response = views.search(request, '_all', 'building') + content = json.loads(response.content) + + # we have data only for indicator + self.assertTrue(len(content['workflowlevel1']) == 0) + self.assertTrue(len(content['workflowlevel2']) == 0) + self.assertTrue(len(content['indicators']) > 0) + self.assertTrue(len(content['collected_data']) == 0) + + indicator_1 = content['indicators'][0]['_source'] + search_after = content['indicators'][0]['sort'] + + # get more data using the cursor + request = self.factory.get('?search_after={},{}'.format( + search_after[0], search_after[1])) + request.user = self.tola_user.user + response = views.search(request, '_indicators', 'building') + + content = json.loads(response.content) + self.assertTrue(len(content['indicators']) > 0) + + indicator_2 = content['indicators'][0]['_source'] + self.assertNotEqual(indicator_1['indicator_uuid'], + indicator_2['indicator_uuid']) diff --git a/search/views.py b/search/views.py index 16905c4f5..a4e8514b2 100644 --- a/search/views.py +++ b/search/views.py @@ -1,12 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +import logging import json from django.conf import settings -from django.contrib.auth.decorators import login_required -from django.core.management import call_command from django.http import HttpResponse -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch, exceptions from rest_framework.decorators import api_view from indicators.models import Indicator, CollectedData @@ -17,6 +16,7 @@ else: es = None +logger = logging.getLogger(__name__) """ @login_required(login_url='/accounts/login/') @@ -35,18 +35,20 @@ def search(request, index, term): else: prefix = '' - user_org_uuid = TolaUser.objects.get(user=request.user).organization.organization_uuid + user_org_uuid = TolaUser.objects.values_list( + 'organization__organization_uuid', flat=True).get( + user=request.user) prefix = prefix + str(user_org_uuid) + '_' index = index.lower().strip('_') # replace leading _ that _all cannot be accessed directly allowed_indices = ['workflow_level1', 'workflow_level2', 'indicators', 'collected_data'] if index.lower() == 'all': - index = prefix + 'workflow_level1,' + prefix + 'workflow_level2,' + prefix + 'indicators,' + prefix + 'collected_data' + index = ['{}{}'.format(prefix, i) for i in allowed_indices] elif not index in allowed_indices: raise Exception("Index not allowed to access") else: - index = prefix + index + index = [prefix + index] b = { "query": { @@ -67,21 +69,41 @@ def search(request, index, term): {"match": {"site.name": term}} ] } - } + }, + "sort": [ + {"create_date": "asc"}, + {"id": "desc"} + ] } - response = es.search(index=index, body=b) - results = {"workflowlevel1": [], "workflowlevel2": [], "indicators": [], "collected_data": []} - - # group result by type - for hit in response["hits"]["hits"]: - if "workflow_level1" in hit["_index"]: - results["workflowlevel1"].append(hit) - elif "workflow_level2" in hit["_index"]: - results["workflowlevel2"].append(hit) - elif "indicators" in hit["_index"]: - results["indicators"].append(hit) - elif "collected_data" in hit["_index"]: - results["collected_data"].append(hit) + + search_after = request.GET.get('search_after', None) + if search_after is not None: + search_after = search_after.split(',') + b['search_after'] = search_after + + results = { + "workflowlevel1": [], + "workflowlevel2": [], + "indicators": [], + "collected_data": [] + } + + for i in index: + try: + response = es.search(index=i, body=b) + except exceptions.NotFoundError as e: + logger.info(e) + else: + hits = response["hits"]["hits"] + if len(hits) > 0: + if "workflow_level1" in hits[0]["_index"]: + results["workflowlevel1"].extend(hits) + elif "workflow_level2" in hits[0]["_index"]: + results["workflowlevel2"].extend(hits) + elif "indicators" in hits[0]["_index"]: + results["indicators"].extend(hits) + elif "collected_data" in hits[0]["_index"]: + results["collected_data"].extend(hits) # check access if not request.user.is_superuser \ @@ -89,8 +111,7 @@ def search(request, index, term): 'name', flat=True): allowed_wf1s = WorkflowTeam.objects.filter( - workflow_user__user=request.user).values_list('workflowlevel1__id', - flat=True) + workflow_user__user=request.user).values_list('workflowlevel1__id',flat=True) wf1_results = [] for wf1 in results["workflowlevel1"]: