From 76f8408da5de6489d20b9b70d0dbd88744772690 Mon Sep 17 00:00:00 2001 From: Kelvin Muchiri Date: Fri, 24 May 2024 15:59:24 +0300 Subject: [PATCH] Optimize attachment xform migration (#2599) * convert python code into SQL query Run migration as SQL query to optimize performance when number of records huge * update reverse query for migration --- .../0014_populate_attachment_xform.py | 41 ++----------------- 1 file changed, 4 insertions(+), 37 deletions(-) diff --git a/onadata/apps/logger/migrations/0014_populate_attachment_xform.py b/onadata/apps/logger/migrations/0014_populate_attachment_xform.py index ba010bb481..7f34fc3f20 100644 --- a/onadata/apps/logger/migrations/0014_populate_attachment_xform.py +++ b/onadata/apps/logger/migrations/0014_populate_attachment_xform.py @@ -3,40 +3,6 @@ from django.db import migrations -def populate_attachment_xform(apps, schema_editor): - """Populate xform field for Attachments""" - Attachment = apps.get_model("logger", "Attachment") - queryset = Attachment.objects.filter(xform__isnull=True).values( - "pk", "instance__xform", "instance__user" - ) - count = queryset.count() - print("Start populating attachment xform...") - print(f"Found {count} records") - - for attachment in queryset.iterator(chunk_size=100): - # We do not want to trigger Model.save or any signal - # Queryset.update is a workaround to achieve this. - # Model.save and the post/pre signals may contain - # some side-effects which we are not interested in - Attachment.objects.filter(pk=attachment["pk"]).update( - xform=attachment["instance__xform"], - user=attachment["instance__user"], - ) - count -= 1 - print(f"{count} remaining") - - print("Done populating attachment xform!") - - -def reverse_populate_attachment_xform(apps, schema_editor): - """Reverse populate xform field when migrations are unapplied""" - Attachment = apps.get_model("logger", "Attachment") - queryset = Attachment.objects.filter(xform__isnull=False).values("pk") - - for attachment in queryset.iterator(chunk_size=100): - Attachment.objects.filter(pk=attachment["pk"]).update(xform=None, user=None) - - class Migration(migrations.Migration): dependencies = [ @@ -44,7 +10,8 @@ class Migration(migrations.Migration): ] operations = [ - migrations.RunPython( - populate_attachment_xform, reverse_populate_attachment_xform - ) + migrations.RunSQL( + sql="WITH logger_attachment_instance AS (SELECT logger_attachment.id, logger_instance.xform_id, logger_instance.user_id FROM logger_attachment INNER JOIN logger_instance ON logger_attachment.instance_id = logger_instance.id INNER JOIN logger_xform T4 ON logger_instance.xform_id = T4.id WHERE logger_attachment.xform_id IS NULL AND T4.deleted_at IS NULL) UPDATE logger_attachment SET xform_id = logger_attachment_instance.xform_id, user_id = logger_attachment_instance.user_id FROM logger_attachment_instance WHERE logger_attachment.id = logger_attachment_instance.id;", + reverse_sql="WITH logger_attachment_xform AS (SELECT logger_attachment.id FROM logger_attachment INNER JOIN logger_xform T4 ON logger_attachment.xform_id = T4.id WHERE logger_attachment.xform_id IS NOT NULL AND T4.deleted_at IS NULL) UPDATE logger_attachment SET xform_id = NULL, user_id = NULL FROM logger_attachment_xform WHERE logger_attachment.id = logger_attachment_xform.id;", + ), ]