From e1992c0b9a7922cc5f03d2c93bb287896e267557 Mon Sep 17 00:00:00 2001 From: Tiago Geada Date: Thu, 31 Oct 2024 16:21:15 +0000 Subject: [PATCH] Revert "(fix): refactored scryfall model" --- src/mtg/migrations/0001_scryfall_data.py | 8 ++- src/mtg/models.py | 26 +++---- src/mtg/services.py | 44 ++++-------- src/mtg/tasks.py | 87 +++++++----------------- 4 files changed, 53 insertions(+), 112 deletions(-) diff --git a/src/mtg/migrations/0001_scryfall_data.py b/src/mtg/migrations/0001_scryfall_data.py index a7f8162..4065dab 100644 --- a/src/mtg/migrations/0001_scryfall_data.py +++ b/src/mtg/migrations/0001_scryfall_data.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1.2 on 2024-10-31 15:37 +# Generated by Django 5.1.2 on 2024-10-29 17:34 from django.db import migrations, models @@ -23,8 +23,10 @@ class Migration(migrations.Migration): ), ("obs", models.TextField(blank=True, verbose_name="Observations")), ("active", models.BooleanField(default=True, verbose_name="active")), - ("id", models.UUIDField(primary_key=True, serialize=False)), - ("cardmarket_id", models.PositiveIntegerField()), + ( + "cardmarket_id", + models.PositiveIntegerField(primary_key=True, serialize=False), + ), ("oracle_id", models.CharField(max_length=128, null=True)), ("name", models.CharField(max_length=256, null=True)), ("mana_cost", models.CharField(blank=True, max_length=128, null=True)), diff --git a/src/mtg/models.py b/src/mtg/models.py index e10e8c7..dbd5edd 100644 --- a/src/mtg/models.py +++ b/src/mtg/models.py @@ -2,23 +2,23 @@ from lib.models import BaseAbstractModel -# class ScryfallCardManager(models.Manager): -# # Taken from https://github.com/baronvonvaderham/django-mtg-card-catalog -# -# def get_or_create_card(self, card_data): -# """Fetch or create a card based on the provided data dictionary.""" -# card, created = self.update_or_create( -# id=card_data["id"], -# defaults=card_data, -# ) -# return created, card + +class ScryfallCardManager(models.Manager): + # Taken from https://github.com/baronvonvaderham/django-mtg-card-catalog + + def get_or_create_card(self, card_data): + """Fetch or create a card based on the provided data dictionary.""" + card, created = self.update_or_create( + cardmarket_id=card_data["cardmarket_id"], + defaults=card_data, + ) + return created, card class ScryfallCard(BaseAbstractModel): """Class to contain a local version of the scryfall data to limit the need for external API calls.""" - id = models.UUIDField(primary_key=True, editable=True) - cardmarket_id = models.PositiveIntegerField() + cardmarket_id = models.PositiveIntegerField(blank=False, primary_key=True) oracle_id = models.CharField(max_length=128, null=True) # NOQA nosemgrep name = models.CharField(max_length=256, null=True) # nosemgrep mana_cost = models.CharField(max_length=128, blank=True, null=True) # NOQA nosemgrep @@ -33,7 +33,7 @@ class ScryfallCard(BaseAbstractModel): image_small = models.URLField(blank=True, null=True) # NOQA nosemgrep image_normal = models.URLField(blank=True, null=True) # NOQA nosemgrep - # objects = ScryfallCardManager() + objects = ScryfallCardManager() class Meta: indexes = [models.Index(fields=['cardmarket_id'], name='idx_scryfallcard_cm_id')] diff --git a/src/mtg/services.py b/src/mtg/services.py index 7e153bd..2f960e1 100644 --- a/src/mtg/services.py +++ b/src/mtg/services.py @@ -1,8 +1,6 @@ -import json import unicodedata import requests -from tqdm.auto import tqdm from .constants import BASIC_TYPES, SCRYFALL_BULK_DATA_URL from .models import ScryfallCard @@ -27,7 +25,7 @@ def process_card_types(card_data): continue # If there is a ' - ', that means we have subtypes to the right, supertypes to the left - if ' - ' in type_line: + if ' — ' in type_line: main_types, subtypes = type_line.split(' - ') else: main_types, subtypes = type_line, None @@ -47,23 +45,12 @@ def scryfall_download_bulk_data(): response = requests.get(SCRYFALL_BULK_DATA_URL, timeout=10) response.raise_for_status() # Raise an error for bad responses url = response.json() - - # Find bulk data url url = next(item for item in url['data'] if item['type'] == 'default_cards') url = url['download_uri'] - # Download in chunks - response = requests.get(url, timeout=10, stream=True) + response = requests.get(url, timeout=10) response.raise_for_status() - - total_size = int(response.headers.get('Content-Length', 0)) if 'Content-Length' in response.headers else None - with tqdm(total=total_size, unit='B', unit_scale=True, desc='Downloading') as progress_bar: - json_data = [] - for chunk in response.iter_content(chunk_size=8192): - json_data.append(chunk) - progress_bar.update(len(chunk)) - - return json.loads(b''.join(json_data)) + return response.json() def scryfall_process_data(data): @@ -78,19 +65,13 @@ def scryfall_transform_card_data(raw_card_data): """Convert raw Scryfall data to model-compatible format, applying constants-based filters and transformations.""" # Skipping unwanted stuff - skipping_ids = {'90f17b85-a866-48e8-aae0-55330109550e'} if not raw_card_data.get('cardmarket_id'): return None if raw_card_data.get('name').split(' ')[0] in BASIC_TYPES: return None if '(' in raw_card_data.get('name'): return None - if raw_card_data.get('id') in skipping_ids: - return None - scryfall_id = raw_card_data.get('id') - oracle_id = raw_card_data.get('oracle_id') - cardmarket_id = raw_card_data.get('cardmarket_id') card_name = raw_card_data.get('name', '') card_name = ''.join(c for c in unicodedata.normalize('NFD', card_name) if unicodedata.category(c) != 'Mn') card_types, card_subtypes = process_card_types(raw_card_data) @@ -101,7 +82,7 @@ def scryfall_transform_card_data(raw_card_data): image_small = None image_normal = None color_identity = raw_card_data.get('color_identity') - cmc = raw_card_data.get('cmc') + cardmarket_id = raw_card_data.get('cardmarket_id') # Split cards if ' // ' in card_name: @@ -134,16 +115,15 @@ def scryfall_transform_card_data(raw_card_data): image_normal = image_uris.get('image_normal' if 'image_uris' in raw_card_data else 'normal', None) transformed_data = { - 'id': scryfall_id, - 'oracle_id': oracle_id, + 'oracle_id': raw_card_data.get('oracle_id'), 'name': card_name, - 'mana_cost': json.dumps(mana_cost), - 'cmc': cmc, - 'types': json.dumps(card_types), - 'subtypes': json.dumps(card_subtypes), - 'colors': json.dumps(list(colors)), - 'color_identity': json.dumps(color_identity), - 'oracle_text': json.dumps(oracle_text), + 'mana_cost': mana_cost, + 'cmc': raw_card_data.get('cmc'), + 'types': card_types, + 'subtypes': card_subtypes, + 'colors': list(colors), + 'color_identity': color_identity, + 'oracle_text': oracle_text, 'cardmarket_id': cardmarket_id, 'image_small': image_small, 'image_normal': image_normal, diff --git a/src/mtg/tasks.py b/src/mtg/tasks.py index 722d747..f90cedd 100644 --- a/src/mtg/tasks.py +++ b/src/mtg/tasks.py @@ -1,82 +1,41 @@ +from celery import group from celery.utils.log import get_task_logger -from django.utils import timezone from tqdm.auto import tqdm from cm_prices.celery import app from mtg.models import ScryfallCard -from mtg.services import scryfall_download_bulk_data, scryfall_transform_card_data +from mtg.services import ( + scryfall_download_bulk_data, + scryfall_save_card, + scryfall_transform_card_data, +) logger = get_task_logger('tasks.common') @app.task(name='sync_scryfall_task') def sync_scryfall(*args, **kwargs): - """Run Scryfall update bulk task.""" - logger.info('BEGINNING SCRYFALL SYNC TASK') + """Run scryfall update bulk task.""" + logger.info('BEGINNING SCRYFALL SYNC TASK') scryfall_data = scryfall_download_bulk_data() - new_cards = [] - existing_cards = [] - existing_card_ids = set(str(card_id) for card_id in ScryfallCard.objects.values_list('id', flat=True)) - + if kwargs.get('test'): + scryfall_data = scryfall_data[:2] + load_tasks = [] for raw_card_data in tqdm(scryfall_data, unit='card'): - card_data = scryfall_transform_card_data(raw_card_data) - if card_data: - # Check if the card already exists by cardmarket_id - if card_data['id'] in existing_card_ids: - existing_cards.append(ScryfallCard(**card_data)) - else: - timestamp = timezone.now() - card_data['date_updated'] = timestamp - card_data['date_created'] = timestamp - new_cards.append(ScryfallCard(**card_data)) - - # Field list for bulk_update - fields_to_update = [ - 'oracle_id', - 'name', - 'mana_cost', - 'cmc', - 'types', - 'subtypes', - 'colors', - 'color_identity', - 'oracle_text', - 'image_small', - 'image_normal', - 'legalities', - 'cardmarket_id', - ] - - # Bulk create and update - if new_cards: - ScryfallCard.objects.bulk_create(new_cards) - logger.info('%d new cards inserted.', len(new_cards)) - if existing_cards: - bulk_update_if_changed(existing_cards, fields_to_update) - + card = scryfall_transform_card_data(raw_card_data) + if card: + if not ScryfallCard.objects.filter(cardmarket_id=card.get('cardmarket_id')).exists(): + load_tasks.append(get_or_create_scryfall_card.s(card)) + task_group = group(load_tasks) + task_group.apply() logger.info('SCRYFALL SYNC TASK COMPLETE!') -def bulk_update_if_changed(update_cards, fields): - """Bulk update only cards that are different.""" - # Create a mapping of cardmarket_id to existing card data - scryfall_ids = [card.id for card in update_cards] - existing_cards = {str(card.id): card for card in ScryfallCard.objects.filter(id__in=scryfall_ids)} - - cards_to_update = [] - - for update_card in update_cards: - existing_card = existing_cards.get(update_card.id) - # Compare fields to see if there are changes - has_changes = any(getattr(existing_card, field) != getattr(update_card, field) for field in fields) - - if has_changes: - update_card.date_updated = timezone.now() - cards_to_update.append(update_card) +@app.task(name='get_or_create_scryfall_card') +def get_or_create_scryfall_card(card_data): + """Create card in local scryfall model.""" - # Perform the bulk update only if there are changes - if cards_to_update: - update_fields = fields + ['date_updated'] - ScryfallCard.objects.bulk_update(cards_to_update, update_fields) - logger.info('Updated %d cards.', len(cards_to_update)) + created, card = scryfall_save_card(card_data) + if created: + logger.info('Created new Scryfall card: %s', card.name)