Skip to content

Commit

Permalink
(fix): refactored scryfall model
Browse files Browse the repository at this point in the history
  • Loading branch information
cusco committed Oct 31, 2024
1 parent 09b5ada commit 5bfaaad
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 53 deletions.
8 changes: 3 additions & 5 deletions src/mtg/migrations/0001_scryfall_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 5.1.2 on 2024-10-29 17:34
# Generated by Django 5.1.2 on 2024-10-31 15:37

from django.db import migrations, models

Expand All @@ -23,10 +23,8 @@ class Migration(migrations.Migration):
),
("obs", models.TextField(blank=True, verbose_name="Observations")),
("active", models.BooleanField(default=True, verbose_name="active")),
(
"cardmarket_id",
models.PositiveIntegerField(primary_key=True, serialize=False),
),
("id", models.UUIDField(primary_key=True, serialize=False)),
("cardmarket_id", models.PositiveIntegerField()),
("oracle_id", models.CharField(max_length=128, null=True)),
("name", models.CharField(max_length=256, null=True)),
("mana_cost", models.CharField(blank=True, max_length=128, null=True)),
Expand Down
26 changes: 13 additions & 13 deletions src/mtg/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,23 @@

from lib.models import BaseAbstractModel


class ScryfallCardManager(models.Manager):
# Taken from https://github.com/baronvonvaderham/django-mtg-card-catalog

def get_or_create_card(self, card_data):
"""Fetch or create a card based on the provided data dictionary."""
card, created = self.update_or_create(
cardmarket_id=card_data["cardmarket_id"],
defaults=card_data,
)
return created, card
# class ScryfallCardManager(models.Manager):
# # Taken from https://github.com/baronvonvaderham/django-mtg-card-catalog
#
# def get_or_create_card(self, card_data):
# """Fetch or create a card based on the provided data dictionary."""
# card, created = self.update_or_create(
# id=card_data["id"],
# defaults=card_data,
# )
# return created, card


class ScryfallCard(BaseAbstractModel):
"""Class to contain a local version of the scryfall data to limit the need for external API calls."""

cardmarket_id = models.PositiveIntegerField(blank=False, primary_key=True)
id = models.UUIDField(primary_key=True, editable=True)
cardmarket_id = models.PositiveIntegerField()
oracle_id = models.CharField(max_length=128, null=True) # NOQA nosemgrep
name = models.CharField(max_length=256, null=True) # nosemgrep
mana_cost = models.CharField(max_length=128, blank=True, null=True) # NOQA nosemgrep
Expand All @@ -33,7 +33,7 @@ class ScryfallCard(BaseAbstractModel):
image_small = models.URLField(blank=True, null=True) # NOQA nosemgrep
image_normal = models.URLField(blank=True, null=True) # NOQA nosemgrep

objects = ScryfallCardManager()
# objects = ScryfallCardManager()

class Meta:
indexes = [models.Index(fields=['cardmarket_id'], name='idx_scryfallcard_cm_id')]
Expand Down
44 changes: 32 additions & 12 deletions src/mtg/services.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import unicodedata

import requests
from tqdm.auto import tqdm

from .constants import BASIC_TYPES, SCRYFALL_BULK_DATA_URL
from .models import ScryfallCard
Expand All @@ -25,7 +27,7 @@ def process_card_types(card_data):
continue

# If there is a ' - ', that means we have subtypes to the right, supertypes to the left
if ' ' in type_line:
if ' - ' in type_line:
main_types, subtypes = type_line.split(' - ')
else:
main_types, subtypes = type_line, None
Expand All @@ -45,12 +47,23 @@ def scryfall_download_bulk_data():
response = requests.get(SCRYFALL_BULK_DATA_URL, timeout=10)
response.raise_for_status() # Raise an error for bad responses
url = response.json()

# Find bulk data url
url = next(item for item in url['data'] if item['type'] == 'default_cards')
url = url['download_uri']

response = requests.get(url, timeout=10)
# Download in chunks
response = requests.get(url, timeout=10, stream=True)
response.raise_for_status()
return response.json()

total_size = int(response.headers.get('Content-Length', 0)) if 'Content-Length' in response.headers else None
with tqdm(total=total_size, unit='B', unit_scale=True, desc='Downloading') as progress_bar:
json_data = []
for chunk in response.iter_content(chunk_size=8192):
json_data.append(chunk)
progress_bar.update(len(chunk))

return json.loads(b''.join(json_data))


def scryfall_process_data(data):
Expand All @@ -65,13 +78,19 @@ def scryfall_transform_card_data(raw_card_data):
"""Convert raw Scryfall data to model-compatible format, applying constants-based filters and transformations."""

# Skipping unwanted stuff
skipping_ids = {'90f17b85-a866-48e8-aae0-55330109550e'}
if not raw_card_data.get('cardmarket_id'):
return None
if raw_card_data.get('name').split(' ')[0] in BASIC_TYPES:
return None
if '(' in raw_card_data.get('name'):
return None
if raw_card_data.get('id') in skipping_ids:
return None

scryfall_id = raw_card_data.get('id')
oracle_id = raw_card_data.get('oracle_id')
cardmarket_id = raw_card_data.get('cardmarket_id')
card_name = raw_card_data.get('name', '')
card_name = ''.join(c for c in unicodedata.normalize('NFD', card_name) if unicodedata.category(c) != 'Mn')
card_types, card_subtypes = process_card_types(raw_card_data)
Expand All @@ -82,7 +101,7 @@ def scryfall_transform_card_data(raw_card_data):
image_small = None
image_normal = None
color_identity = raw_card_data.get('color_identity')
cardmarket_id = raw_card_data.get('cardmarket_id')
cmc = raw_card_data.get('cmc')

# Split cards
if ' // ' in card_name:
Expand Down Expand Up @@ -115,15 +134,16 @@ def scryfall_transform_card_data(raw_card_data):
image_normal = image_uris.get('image_normal' if 'image_uris' in raw_card_data else 'normal', None)

transformed_data = {
'oracle_id': raw_card_data.get('oracle_id'),
'id': scryfall_id,
'oracle_id': oracle_id,
'name': card_name,
'mana_cost': mana_cost,
'cmc': raw_card_data.get('cmc'),
'types': card_types,
'subtypes': card_subtypes,
'colors': list(colors),
'color_identity': color_identity,
'oracle_text': oracle_text,
'mana_cost': json.dumps(mana_cost),
'cmc': cmc,
'types': json.dumps(card_types),
'subtypes': json.dumps(card_subtypes),
'colors': json.dumps(list(colors)),
'color_identity': json.dumps(color_identity),
'oracle_text': json.dumps(oracle_text),
'cardmarket_id': cardmarket_id,
'image_small': image_small,
'image_normal': image_normal,
Expand Down
87 changes: 64 additions & 23 deletions src/mtg/tasks.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,82 @@
from celery import group
from celery.utils.log import get_task_logger
from django.utils import timezone
from tqdm.auto import tqdm

from cm_prices.celery import app
from mtg.models import ScryfallCard
from mtg.services import (
scryfall_download_bulk_data,
scryfall_save_card,
scryfall_transform_card_data,
)
from mtg.services import scryfall_download_bulk_data, scryfall_transform_card_data

logger = get_task_logger('tasks.common')


@app.task(name='sync_scryfall_task')
def sync_scryfall(*args, **kwargs):
"""Run scryfall update bulk task."""

"""Run Scryfall update bulk task."""
logger.info('BEGINNING SCRYFALL SYNC TASK')

scryfall_data = scryfall_download_bulk_data()
if kwargs.get('test'):
scryfall_data = scryfall_data[:2]
load_tasks = []
new_cards = []
existing_cards = []
existing_card_ids = set(str(card_id) for card_id in ScryfallCard.objects.values_list('id', flat=True))

for raw_card_data in tqdm(scryfall_data, unit='card'):
card = scryfall_transform_card_data(raw_card_data)
if card:
if not ScryfallCard.objects.filter(cardmarket_id=card.get('cardmarket_id')).exists():
load_tasks.append(get_or_create_scryfall_card.s(card))
task_group = group(load_tasks)
task_group.apply()
card_data = scryfall_transform_card_data(raw_card_data)
if card_data:
# Check if the card already exists by cardmarket_id
if card_data['id'] in existing_card_ids:
existing_cards.append(ScryfallCard(**card_data))
else:
timestamp = timezone.now()
card_data['date_updated'] = timestamp
card_data['date_created'] = timestamp
new_cards.append(ScryfallCard(**card_data))

# Field list for bulk_update
fields_to_update = [
'oracle_id',
'name',
'mana_cost',
'cmc',
'types',
'subtypes',
'colors',
'color_identity',
'oracle_text',
'image_small',
'image_normal',
'legalities',
'cardmarket_id',
]

# Bulk create and update
if new_cards:
ScryfallCard.objects.bulk_create(new_cards)
logger.info('%d new cards inserted.', len(new_cards))
if existing_cards:
bulk_update_if_changed(existing_cards, fields_to_update)

logger.info('SCRYFALL SYNC TASK COMPLETE!')


@app.task(name='get_or_create_scryfall_card')
def get_or_create_scryfall_card(card_data):
"""Create card in local scryfall model."""
def bulk_update_if_changed(update_cards, fields):
"""Bulk update only cards that are different."""
# Create a mapping of cardmarket_id to existing card data
scryfall_ids = [card.id for card in update_cards]
existing_cards = {str(card.id): card for card in ScryfallCard.objects.filter(id__in=scryfall_ids)}

cards_to_update = []

for update_card in update_cards:
existing_card = existing_cards.get(update_card.id)
# Compare fields to see if there are changes
has_changes = any(getattr(existing_card, field) != getattr(update_card, field) for field in fields)

if has_changes:
update_card.date_updated = timezone.now()
cards_to_update.append(update_card)

created, card = scryfall_save_card(card_data)
if created:
logger.info('Created new Scryfall card: %s', card.name)
# Perform the bulk update only if there are changes
if cards_to_update:
update_fields = fields + ['date_updated']
ScryfallCard.objects.bulk_update(cards_to_update, update_fields)
logger.info('Updated %d cards.', len(cards_to_update))

0 comments on commit 5bfaaad

Please sign in to comment.