From e040384519a7030d17227395ff5ed4cbfb46cb5f Mon Sep 17 00:00:00 2001 From: Brian Pepple Date: Tue, 7 Nov 2017 16:32:46 -0500 Subject: [PATCH 1/3] pep8 models.py to fix formatting. --- comics/models.py | 256 ++++++++++++++++++++++++++--------------------- 1 file changed, 140 insertions(+), 116 deletions(-) diff --git a/comics/models.py b/comics/models.py index ee67da5..ba56d4e 100644 --- a/comics/models.py +++ b/comics/models.py @@ -1,6 +1,10 @@ from __future__ import unicode_literals -import datetime,os,rarfile,zipfile,tarfile +import datetime +import os +import rarfile +import zipfile +import tarfile from shutil import copyfile from multiselectfield import MultiSelectField @@ -17,156 +21,176 @@ Choices for model use. """ # Generated years -YEAR_CHOICES = [(r,r) for r in range(1837, datetime.date.today().year+1)] +YEAR_CHOICES = [(r, r) for r in range(1837, datetime.date.today().year + 1)] # Comic read status STATUS_CHOICES = ( - (0, 'Unread'), - (1, 'Partially read'), - (2, 'Completed'), + (0, 'Unread'), + (1, 'Partially read'), + (2, 'Completed'), ) # Creator roles for an issue ROLE_CHOICES = ( - ('artist', 'Artist'), - ('colorist', 'Colorist'), - ('cover', 'Cover'), - ('editor', 'Editor'), - ('inker', 'Inker'), - ('journalist', 'Journalist'), - ('letterer', 'Letterer'), - ('other', 'Other'), - ('penciler', 'Penciler'), - ('production', 'Production'), - ('writer', 'Writer'), + ('artist', 'Artist'), + ('colorist', 'Colorist'), + ('cover', 'Cover'), + ('editor', 'Editor'), + ('inker', 'Inker'), + ('journalist', 'Journalist'), + ('letterer', 'Letterer'), + ('other', 'Other'), + ('penciler', 'Penciler'), + ('production', 'Production'), + ('writer', 'Writer'), ) + class Settings(SingletonModel): - api_key = models.CharField( - 'ComicVine API Key', - help_text="A 40-character key provided by ComicVine. This is used to retrieve metadata about your comics. You can create a ComicVine API Key at ComicVine's API Page (ComicVine account is required).", - validators=[RegexValidator( - regex='^.{40}$', - message='Length must be 40 characters.', - code='nomatch' - )], - max_length=40, - blank=True - ) - - def __str__(self): - return "Settings" + api_key = models.CharField( + 'ComicVine API Key', + help_text="A 40-character key provided by ComicVine. This is used to retrieve metadata about your comics. You can create a ComicVine API Key at ComicVine's API Page (ComicVine account is required).", + validators=[RegexValidator( + regex='^.{40}$', + message='Length must be 40 characters.', + code='nomatch' + )], + max_length=40, + blank=True + ) + + def __str__(self): + return "Settings" + class Arc(models.Model): - cvid = models.CharField(max_length=15) - cvurl = models.URLField(max_length=200) - name = models.CharField('Arc name', max_length=200) - desc = models.TextField('Description', max_length=500, blank=True) - image = models.FilePathField('Image file path', path="media/images", blank=True) + cvid = models.CharField(max_length=15) + cvurl = models.URLField(max_length=200) + name = models.CharField('Arc name', max_length=200) + desc = models.TextField('Description', max_length=500, blank=True) + image = models.FilePathField( + 'Image file path', path="media/images", blank=True) + + def __str__(self): + return self.name - def __str__(self): - return self.name class Team(models.Model): - cvid = models.CharField(max_length=15) - cvurl = models.URLField(max_length=200) - name = models.CharField('Team name', max_length=200) - desc = models.TextField('Description', max_length=500, blank=True) - image = models.FilePathField('Image file path', path="media/images", blank=True) + cvid = models.CharField(max_length=15) + cvurl = models.URLField(max_length=200) + name = models.CharField('Team name', max_length=200) + desc = models.TextField('Description', max_length=500, blank=True) + image = models.FilePathField( + 'Image file path', path="media/images", blank=True) + + def __str__(self): + return self.name - def __str__(self): - return self.name class Character(models.Model): - cvid = models.CharField(max_length=15) - cvurl = models.URLField(max_length=200) - name = models.CharField('Character name', max_length=200) - desc = models.TextField('Description', max_length=500, blank=True) - teams = models.ManyToManyField(Team, blank=True) - image = models.FilePathField('Image file path', path="media/images", blank=True) + cvid = models.CharField(max_length=15) + cvurl = models.URLField(max_length=200) + name = models.CharField('Character name', max_length=200) + desc = models.TextField('Description', max_length=500, blank=True) + teams = models.ManyToManyField(Team, blank=True) + image = models.FilePathField( + 'Image file path', path="media/images", blank=True) + + def __str__(self): + return self.name - def __str__(self): - return self.name class Creator(models.Model): - cvid = models.CharField(max_length=15) - cvurl = models.URLField(max_length=200) - name = models.CharField('Creator name', max_length=200) - desc = models.TextField('Description', max_length=500, blank=True) - image = models.FilePathField('Image file path', path="media/images", blank=True) + cvid = models.CharField(max_length=15) + cvurl = models.URLField(max_length=200) + name = models.CharField('Creator name', max_length=200) + desc = models.TextField('Description', max_length=500, blank=True) + image = models.FilePathField( + 'Image file path', path="media/images", blank=True) + + def __str__(self): + return self.name - def __str__(self): - return self.name class Publisher(models.Model): - cvid = models.CharField(max_length=15) - cvurl = models.URLField(max_length=200) - name = models.CharField('Series name', max_length=200) - desc = models.TextField('Description', max_length=500, blank=True) - logo = models.FilePathField('Logo file path', path="media/images", blank=True) + cvid = models.CharField(max_length=15) + cvurl = models.URLField(max_length=200) + name = models.CharField('Series name', max_length=200) + desc = models.TextField('Description', max_length=500, blank=True) + logo = models.FilePathField( + 'Logo file path', path="media/images", blank=True) + + def __str__(self): + return self.name - def __str__(self): - return self.name class Series(models.Model): - cvid = models.CharField(max_length=15, blank=True) - cvurl = models.URLField(max_length=200, blank=True) - name = models.CharField('Series name', max_length=200) - publisher = models.ForeignKey(Publisher, on_delete=models.CASCADE, null=True, blank=True) - year = models.PositiveSmallIntegerField('year', choices=YEAR_CHOICES, default=datetime.datetime.now().year, blank=True) - desc = models.TextField('Description', max_length=500, blank=True) + cvid = models.CharField(max_length=15, blank=True) + cvurl = models.URLField(max_length=200, blank=True) + name = models.CharField('Series name', max_length=200) + publisher = models.ForeignKey( + Publisher, on_delete=models.CASCADE, null=True, blank=True) + year = models.PositiveSmallIntegerField( + 'year', choices=YEAR_CHOICES, default=datetime.datetime.now().year, blank=True) + desc = models.TextField('Description', max_length=500, blank=True) - def __str__(self): - return self.name + def __str__(self): + return self.name - def issue_numerical_order_set(self): - return self.issue_set.all().order_by('number') + def issue_numerical_order_set(self): + return self.issue_set.all().order_by('number') - def issue_count(self): - return self.issue_set.all().count() + def issue_count(self): + return self.issue_set.all().count() - def unread_issue_count(self): - return self.issue_set.exclude(status=2).count() + def unread_issue_count(self): + return self.issue_set.exclude(status=2).count() + + class Meta: + verbose_name_plural = "Series" - class Meta: - verbose_name_plural = "Series" class Issue(models.Model): - cvid = models.CharField('ComicVine ID', max_length=15, blank=True) - cvurl = models.URLField(max_length=200, blank=True) - series = models.ForeignKey(Series, on_delete=models.CASCADE, blank=True) - name = models.CharField('Issue name', max_length=200, blank=True) - number = models.PositiveSmallIntegerField('Issue number') - date = models.DateField('Cover date', blank=True) - desc = models.TextField('Description', max_length=500, blank=True) - arcs = models.ManyToManyField(Arc, blank=True) - characters = models.ManyToManyField(Character, blank=True) - teams = models.ManyToManyField(Team, blank=True) - file = models.FilePathField('File path', path="files/", recursive=True) - cover = models.FilePathField('Cover file path', path="media/images", blank=True) - status = models.PositiveSmallIntegerField('Status', choices=STATUS_CHOICES, default=0, blank=True) - leaf = models.PositiveSmallIntegerField(editable=False, default=1, blank=True) - page_count = models.PositiveSmallIntegerField(editable=False, default=1, blank=True) - - def __str__(self): - return self.series.name + ' #' + str(self.number) - - def get_absolute_url(self): - return reverse('author-detail', kwargs={'pk': self.pk}) - - def page_set(self): - comicfilehandler = ComicFileHandler() - comic = comicfilehandler.extract_comic(self.file, self.id) - - return comic + cvid = models.CharField('ComicVine ID', max_length=15, blank=True) + cvurl = models.URLField(max_length=200, blank=True) + series = models.ForeignKey(Series, on_delete=models.CASCADE, blank=True) + name = models.CharField('Issue name', max_length=200, blank=True) + number = models.PositiveSmallIntegerField('Issue number') + date = models.DateField('Cover date', blank=True) + desc = models.TextField('Description', max_length=500, blank=True) + arcs = models.ManyToManyField(Arc, blank=True) + characters = models.ManyToManyField(Character, blank=True) + teams = models.ManyToManyField(Team, blank=True) + file = models.FilePathField('File path', path="files/", recursive=True) + cover = models.FilePathField( + 'Cover file path', path="media/images", blank=True) + status = models.PositiveSmallIntegerField( + 'Status', choices=STATUS_CHOICES, default=0, blank=True) + leaf = models.PositiveSmallIntegerField( + editable=False, default=1, blank=True) + page_count = models.PositiveSmallIntegerField( + editable=False, default=1, blank=True) + + def __str__(self): + return self.series.name + ' #' + str(self.number) + + def get_absolute_url(self): + return reverse('author-detail', kwargs={'pk': self.pk}) + + def page_set(self): + comicfilehandler = ComicFileHandler() + comic = comicfilehandler.extract_comic(self.file, self.id) + + return comic + class Roles(models.Model): - creator = models.ForeignKey(Creator, on_delete=models.CASCADE) - issue = models.ForeignKey(Issue, on_delete=models.CASCADE) - roles = MultiSelectField(choices=ROLE_CHOICES) + creator = models.ForeignKey(Creator, on_delete=models.CASCADE) + issue = models.ForeignKey(Issue, on_delete=models.CASCADE) + roles = MultiSelectField(choices=ROLE_CHOICES) - def __str__(self): - return self.issue.series.name + ' #' + str(self.issue.number) + ' - ' + self.creator.name + def __str__(self): + return self.issue.series.name + ' #' + str(self.issue.number) + ' - ' + self.creator.name - class Meta: - verbose_name_plural = "Roles" + class Meta: + verbose_name_plural = "Roles" From 8510576218b4e453d87bb730a4407d58f8e436a3 Mon Sep 17 00:00:00 2001 From: Brian Pepple Date: Tue, 7 Nov 2017 17:17:23 -0500 Subject: [PATCH 2/3] pep8 comicimport.py to fix formatting. --- comics/utils/comicimporter.py | 2005 +++++++++++++++++---------------- 1 file changed, 1020 insertions(+), 985 deletions(-) diff --git a/comics/utils/comicimporter.py b/comics/utils/comicimporter.py index 7bba629..09680cd 100644 --- a/comics/utils/comicimporter.py +++ b/comics/utils/comicimporter.py @@ -1,4 +1,10 @@ -import json, os, datetime, re, requests, requests_cache, logging +import json +import os +import datetime +import re +import requests +import requests_cache +import logging from urllib.request import urlretrieve from urllib.parse import quote_plus, unquote_plus from comics.models import Arc, Character, Creator, Team, Publisher, Series, Issue, Roles, Settings @@ -7,1055 +13,1084 @@ from fuzzywuzzy import fuzz -class ComicImporter(object): - - def __init__(self): - # Configure logging - logging.getLogger("requests").setLevel(logging.WARNING) - self.logger = logging.getLogger('tenma') - - # Setup requests caching - requests_cache.install_cache('./media/CACHE/comicvine-cache', expire_after=1800) - requests_cache.core.remove_expired_responses() - - # Set basic reusable strings - self.api_key = Settings.get_solo().api_key - self.directory_path = 'files' - - # API Strings - self.baseurl = 'https://comicvine.gamespot.com/api/' - self.imageurl = 'https://comicvine.gamespot.com/api/image/' - self.base_params = { 'format': 'json', 'api_key': self.api_key } - self.headers = { 'user-agent': 'tenma' } - - # API field strings - self.arc_fields = 'deck,description,id,image,name,site_detail_url' - self.character_fields = 'deck,description,id,image,name,site_detail_url' - self.creator_fields = 'deck,description,id,image,name,site_detail_url' - self.issue_fields = 'api_detail_url,character_credits,cover_date,deck,description,id,image,issue_number,name,person_credits,site_detail_url,story_arc_credits,team_credits,volume' - self.publisher_fields = 'deck,description,id,image,name,site_detail_url' - self.query_issue_fields ='cover_date,id,issue_number,name,volume' - self.query_issue_limit = '100' - self.series_fields = 'api_detail_url,deck,description,id,name,publisher,site_detail_url,start_year' - self.team_fields = 'characters,deck,description,id,image,name,site_detail_url' - - # International reprint publishers - # Ordered by # of issues (est.) for quick matching. - self.int_pubs = [ - 2350, # Panini (21.5k) - 2812, # Marvel UK (4.2k) - 2094, # Abril (2.1k) - 2319, # Planeta DeAgostini (2.1k) - 2903, # Ediciones Zinco (0.7k) - 1133, # Semic As (0.3k) - 2961, # Marvel Italia (0.04k) - ] - - #================================================================================================== - - def import_comic_files(self): - ''' Main entry point to import comic files. ''' - - excluded = Issue.objects.values_list('file', flat=True).order_by('file') - self._process_dir(self.directory_path, excluded) - - #================================================================================================== - - def _process_dir(self, path, excluded): - for entry in os.scandir(path): - # If file, process issue. If directory, process series. - if entry.is_file(): - # Check for unprocessed files: - if entry.path not in excluded: - # Check comic file validity - if utils.valid_comic_file(entry.name): - # Attempt to find match - cvid = '' - if self.api_key != '': - cvid = self._find_issue_match(entry.name) - if cvid != '': - # Process issue with ComicVine - try: - self._process_issue(entry.path, cvid) - except Exception: - self.logger.exception('An error occurred while processing %s' % entry.path) - else: - # Process issue without ComicVine - try: - self._process_issue_without_cvid(entry.path) - except Exception: - self.logger.exception('An error occurred while processing %s' % entry.path) - else: - self._process_dir(entry.path, excluded) - - #================================================================================================== - - def reprocess_issue(self, issue_id): - ''' Reprocess an existing issue in the comics directories. ''' - - issue = Issue.objects.get(id=issue_id) - cvid = '' - - # Check if there's already a cvid. - if issue.cvid and issue.cvid != '': - cvid = issue.cvid - else: - # Attempt to find match - if self.api_key != '': - cvid = self._find_issue_match(issue.file) - else: - cvid = '' - - # Update issue - if cvid != '': - # Process the issue with ComicVine - self._process_issue(issue.file, cvid) - else: - self._reprocess_issue_without_cvid(issue.id) - - #================================================================================================== - - def _find_issue_match(self, filename): - ''' - Try to find a match in ComicVine for an issue. - - Returns a ComicVine ID. - ''' - - # Initialize response - found_issue = None - cvid = '' - - # Attempt to extract series name, issue number, and year - extracted = fnameparser.extract(filename) - series_name = utils.remove_special_characters(extracted[0]) - series_name_url = quote_plus(series_name) - issue_number = extracted[1] if extracted[1] else '1' - issue_year = extracted[2] - - # First check if there's already a series locally - matching_series = Series.objects.filter(name=series_name) - - if matching_series: - if not matching_series[0].cvid == '': - found_issue = self._find_match_with_series(matching_series[0].cvid, issue_number) - - if found_issue is None: - # Query Parameters - query_params = self.base_params - query_params['resources'] = 'issue' - query_params['field_list'] = self.query_issue_fields - query_params['limit'] = self.query_issue_limit - - # Check for series name and issue number, or just series name - if series_name and issue_number: - query_params['query'] = series_name + ' ' + issue_number - query_response = requests.get( - self.baseurl + 'search', - params=query_params, - headers=self.headers - ).json() - elif series_name: - query_params['query'] = series_name_url - query_response = requests.get( - self.baseurl + 'search', - params=query_params, - headers=self.headers - ).json() - - best_option_list = [] - - # Try to find the closest match. - for issue in query_response['results']: - item_year = datetime.date.today().year - item_number = 1 - item_name = '' - item_pub_id = '' - - if 'cover_date' in issue: - if issue['cover_date']: - item_year = issue['cover_date'][0:4] - if 'issue_number' in issue: - if issue['issue_number']: - item_number = issue['issue_number'] - if 'volume' in issue: - if 'name' in issue['volume']: - if issue['volume']['name']: - item_name = issue['volume']['name'] - item_name = utils.remove_special_characters(item_name) - - # Get publisher ID - pub_check_params = self.base_params - pub_check_params['field_list'] = 'publisher' - pub_check_response = requests.get( - self.baseurl + 'volume/4050-' + str(issue['volume']['id']), - params=pub_check_params, - headers=self.headers, - ).json() - - if 'publisher' in pub_check_response['results']: - if pub_check_response['results']['publisher'] is not None: - item_pub_id = pub_check_response['results']['publisher']['id'] - - # Get the match score (0-5) - if series_name: - # Fuzzy match the series name. - # Gives a score between 0 and 2. - score = (fuzz.ratio(item_name.lower(), series_name.lower()) + fuzz.partial_ratio(item_name.lower(), series_name.lower())) / 100 - - # If the issue number is the same, add 1 point. - if item_number == issue_number: - score += 1 - - # If the year is the same, add 2 points. - if issue_year != '': - if item_year == issue_year: - score += 2 - - # If the publisher is an international reprint, subtract a point. - if item_pub_id != '': - if item_pub_id in self.int_pubs: - score = score - 1 if score > 1 else 0 - - # Add the issue and it's score to the list. - best_option_list.insert(0, { - 'score': score, - 'issue': issue, - }) - - # Sort the list by score, and pick the top scoring issue. - best_option_list.sort(key=lambda x: x['score'], reverse=True) - found_issue = best_option_list[0]['issue'] if best_option_list else None - - cvid = found_issue['id'] if found_issue else '' - - if found_issue is not None: - if 'volume' in found_issue: - if 'name' in found_issue['volume']: - if found_issue['volume']['name']: - series = found_issue['volume']['name'] - elif matching_series: - series = matching_series[0].name - if 'issue_number' in found_issue: - if found_issue['issue_number']: - number = found_issue['issue_number'] - else: - number = '' - self.logger.info('\"%(filename)s\" was matched on Comic Vine as \"%(series)s - #%(number)s\" (%(CVID)s)' % { - 'filename': filename, - 'series': series, - 'number': number, - 'CVID': cvid - }) - else: - self.logger.warning('No match was found for \"%s\" on Comic Vine.' % filename) - - return cvid - - #================================================================================================== - - def _find_match_with_series(self, series_cvid, issue_number): - ''' - Try to retrieve a match based on an existing series name. - - Returns an issue from list. - ''' - - found_issue = None - - if issue_number: - # Query Parameters - query_params = self.base_params - query_params['field_list'] = 'issues,name' - - # Attempt to find issue based on extracted Series Name and Issue Number - query_response = requests.get( - self.baseurl + 'volume/4050-' + str(series_cvid), - params=query_params, - headers=self.headers, - ).json() - - # Try to find the closest match. - for issue in query_response['results']['issues']: - item_number = issue['issue_number'] if issue['issue_number'] else '' - if item_number == issue_number: - found_issue = issue - - return found_issue - - #================================================================================================== - - def _process_issue_without_cvid(self, filepath): - ''' Create an issue without a ComicVine ID. ''' - - # Make sure the issue hadn't already been added - matching_issue = Issue.objects.filter(file=filepath) - - filename = os.path.basename(filepath) - - if not matching_issue: - # 1. Attempt to extract series name, issue number, and year - extracted = fnameparser.extract(filepath) - series_name = extracted[0] - issue_number = extracted[1] - issue_year = extracted[2] - - # 2. Set Issue Information: - issue = Issue() - issue.file = filepath - issue.number = issue_number if issue_number else 1 - issue.date = issue_year + '-01-01' if issue_year else datetime.date.today() - - cfh = ComicFileHandler() - issue.cover = cfh.extract_cover(filepath) - issue.page_count = cfh.get_page_count(filepath) - - # 3. Set Series Information: - matching_series = Series.objects.filter(name=series_name) - - if not matching_series: - series = Series() - series.name = series_name - series.save() - issue.series = series - else: - issue.series = matching_series[0] - - # 4. Save Issue. - issue.save() - else: - self._reprocess_issue_without_cvid(matching_issue[0].id) - - self.logger.info('\"%(filename)s\" was processed successfully as \"%(series)s - #%(number)s\"' % { - 'filename': filename, - 'series': issue.series.name, - 'number': issue.number - }) - - #================================================================================================== - - def _reprocess_issue_without_cvid(self, issue_id): - ''' Create an issue without a ComicVine ID. ''' - - # Make sure the issue exists - issue = Issue.objects.get(id=issue_id) - - if issue: - # 1. Attempt to extract series name, issue number, year and cover. - extracted = fnameparser.extract(issue.file) - series_name = extracted[0] - issue_number = extracted[1] - issue_year = extracted[2] - - cfh = ComicFileHandler() - issue_cover = cfh.extract_cover(issue.file) - issue.page_count = cfh.get_page_count(issue.file) - - # 2. Update Issue information: - Issue.objects.filter(id=issue_id).update( - number=issue_number if issue_number else 1, - date=issue_year + '-01-01' if issue_year else datetime.date.today(), - cover=issue_cover, - ) - - # 3. Update Series information: - if Series.objects.get(id=issue.series.id): - Series.objects.filter(id=issue.series.id).update( - name=series_name, - ) - else: - series = Series() - series.name = series_name - series.save() - issue.series = series - issue.save() - - #================================================================================================== - - def _process_issue(self, filename, cvid): - ''' Creates or updates metadata from ComicVine for an Issue. ''' - - # 1. Make initial API call - # Query Parameters - issue_params = self.base_params - issue_params['field_list'] = self.issue_fields - - response_issue = requests.get( - self.baseurl + 'issue/4000-' + str(cvid), - params=issue_params, - headers=self.headers, - ).json() - - # 2. Set Series - matching_series = Series.objects.filter(cvid=response_issue['results']['volume']['id']) - - if not matching_series: - series = self._create_series(response_issue['results']['volume']['api_detail_url']) - else: - series = self._update_series(matching_series[0].id, response_issue['results']['volume']['api_detail_url']) - - # 3. Set Issue - matching_issue = Issue.objects.filter(file=filename) - - if not matching_issue: - issue = self._create_issue(filename, response_issue['results']['api_detail_url'], series.id) - else: - issue = self._update_issue(matching_issue[0].id, response_issue['results']['api_detail_url'], series.id) - - # 4. Set Publisher - # Query Parameters - series_params = self.base_params - series_params['field_list'] = 'publisher' - - response_series = requests.get( - response_issue['results']['volume']['api_detail_url'], - params=series_params, - headers=self.headers, - ).json() - - matching_publisher = Publisher.objects.filter(cvid=response_series['results']['publisher']['id']) - - if not matching_publisher: - self._create_publisher(response_series['results']['publisher']['api_detail_url'], issue.series.id) - else: - self._update_publisher(matching_publisher[0].id, response_series['results']['publisher']['api_detail_url'], issue.series.id) - - # 5. Set Arcs - for story_arc in response_issue['results']['story_arc_credits']: - matching_arc = Arc.objects.filter(cvid=story_arc['id']) - if not matching_arc: - self._create_arc(story_arc['api_detail_url'], issue.id) - else: - issue.arcs.add(self._update_arc(matching_arc[0].id, story_arc['api_detail_url'])) - - # 6. Set Characters - for character in response_issue['results']['character_credits']: - matching_character = Character.objects.filter(cvid=character['id']) - if not matching_character: - self._create_character(character['api_detail_url'], issue.id) - else: - issue.characters.add(self._update_character(matching_character[0].id, character['api_detail_url'])) - - # 7. Set Creators - for person in response_issue['results']['person_credits']: - matching_creator = Creator.objects.filter(cvid=person['id']) - if not matching_creator: - self._create_creator(person['api_detail_url'], re.sub(' ', '', person['role']), issue.id) - else: - Roles.objects.create( - creator=matching_creator[0], - issue=issue, - roles=re.sub(' ', '', person['role']) - ) - - # 8. Set Teams - for team in response_issue['results']['team_credits']: - matching_team = Team.objects.filter(cvid=team['id']) - if not matching_team: - self._create_team(team['api_detail_url'], issue.id) - else: - issue.teams.add(self._update_team(matching_team[0].id, team['api_detail_url'])) - - self.logger.info('\"%(filename)s\" was processed successfully as \"%(series)s - #%(number)s\" (%(CVID)s)' % { - 'filename': filename, - 'series': series.name, - 'number': issue.number, - 'CVID': issue.cvid - }) - - #================================================================================================== - - def _get_object_data(self, response): - ''' - Gathers object data from a response and tests each value to make sure - it exists in the response before trying to set it. - - CVID and CVURL will always exist in a ComicVine response, so there - is no need to verify this data. - - Returns a dictionary with all the gathered data. - ''' - - # Get Name - name = '' - - if 'name' in response: - if response['name']: - name = response['name'] - - # Get Start Year (only exists for Series objects) - year = '' - - if 'start_year' in response: - if response['start_year']: - year = response['start_year'] - - # Get Number (only exists for Issue objects) - number = '' - - if 'issue_number' in response: - if response['issue_number']: - number = response['issue_number'] - - # Get Date (only exists for Issue objects) - date = datetime.date.today() - - if 'cover_date' in response: - if response['cover_date']: - date = response['cover_date'] - - - # Get Description (Favor short description if available) - desc = '' - - if 'deck' in response: - if response['deck']: - desc = response['deck'] - if desc == '': - if 'description' in response: - if response['description']: - desc = utils.parse_CV_HTML(response['description']) - - # Get Image - image = '' - - if 'image' in response: - if response['image']: - image_url = self.imageurl + response['image']['super_url'].rsplit('/', 1)[-1] - image_filename = unquote_plus(image_url.split('/')[-1]) - if image_filename != '1-male-good-large.jpg' and not re.match(".*question_mark_large.*.jpg", image_filename): - image = utils.test_image(urlretrieve(image_url, 'media/images/' + image_filename)[0]) - - # Create data object - data = { - 'cvid': response['id'], # Always exists - 'cvurl': response['site_detail_url'], # Always exists - 'name': name, - 'year': year, - 'number': number, - 'date': date, - 'desc': desc, - 'image': image, - } - - return data - - #================================================================================================== - - def _create_arc(self, api_url, issue_id): - ''' - Creates Arc from ComicVine API URL and adds it to - it's corresponding Issue. - - Returns the Arc object created. - ''' - - # Request and Response - params = self.base_params - params['field_list'] = self.arc_fields - - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() - - data = self._get_object_data(response['results']) - issue = Issue.objects.get(id=issue_id) - - # Create Arc - a = issue.arcs.create( - cvid=data['cvid'], - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - image=data['image'], - ) - - return a - - #================================================================================================== +class ComicImporter(object): - def _create_character(self, api_url, issue_id): - ''' - Creates Character from ComicVine API URL and adds it to - it's corresponding Issue. - - Returns the Character object created. - ''' - - # Request and Response - params = self.base_params - params['field_list'] = self.character_fields - - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() - - data = self._get_object_data(response['results']) - - issue = Issue.objects.get(id=issue_id) - - # Create Character - ch = issue.characters.create( - cvid=data['cvid'], - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - image=data['image'], - ) - - return ch - - #================================================================================================== - - def _create_creator(self, api_url, roles, issue_id): - ''' - Creates Creator from ComicVine API URL and adds it to - it's corresponding Issue. - - Returns the Creator object created. - ''' - - # Request and Response - params = self.base_params - params['field_list'] = self.creator_fields - - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() - - data = self._get_object_data(response['results']) - - issue = Issue.objects.get(id=issue_id) - - # Create Creator - cr = Creator.objects.create( - cvid=data['cvid'], - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - image=data['image'], - ) - - # Create Role in issue - r = Roles.objects.create( - creator=cr, - issue=issue, - roles=roles - ) - - return cr - - #================================================================================================== - - def _create_issue(self, file, api_url, series_id): - ''' - Creates Issue from ComicVine API URL and adds the - corresponding Series. - - Returns the Issue object created. - ''' - cfh = ComicFileHandler() - - # Request and Response - params = self.base_params - params['field_list'] = self.issue_fields - - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() - - data = self._get_object_data(response['results']) - - series = Series.objects.get(id=series_id) - - # Create Issue - i = Issue.objects.create( - cvid=data['cvid'], - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - number=data['number'], - date=data['date'], - file=file, - series=series, - cover=data['image'], - page_count=cfh.get_page_count(file), - ) - - return i - - #================================================================================================== - - def _create_publisher(self, api_url, series_id): - ''' - Creates Publisher from ComicVine API URL and adds it to - it's corresponding Series. - - Returns the Publisher object created. - ''' - - # Request and Response - params = self.base_params - params['field_list'] = self.publisher_fields - - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() - - data = self._get_object_data(response['results']) - - # Create Publisher - p = Publisher.objects.create( - cvid=data['cvid'], - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - logo=data['image'], - ) - - # Add Publisher to Series - series = Series.objects.get(id=series_id) - series.publisher = p - series.save() - - return p - - #================================================================================================== - - def _create_team(self, api_url, issue_id): - ''' - Creates Team from ComicVine API URL and adds it to - it's corresponding Issue. - - Returns the Team object created. - ''' - - # Request and Response - params = self.base_params - params['field_list'] = self.team_fields - - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() - - data = self._get_object_data(response['results']) - - issue = Issue.objects.get(id=issue_id) - - # Create Team - t = issue.teams.create( - cvid=data['cvid'], - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - image=data['image'], - ) - - # Add existing Characters to Team - for character in response['results']['characters']: - matching_character = Character.objects.filter(cvid=character['id']) - if matching_character: - team_item = Team.objects.filter(cvid=t.cvid) - matching_character[0].teams.add(team_item[0]) - - return t + def __init__(self): + # Configure logging + logging.getLogger("requests").setLevel(logging.WARNING) + self.logger = logging.getLogger('tenma') + + # Setup requests caching + requests_cache.install_cache( + './media/CACHE/comicvine-cache', expire_after=1800) + requests_cache.core.remove_expired_responses() + + # Set basic reusable strings + self.api_key = Settings.get_solo().api_key + self.directory_path = 'files' + + # API Strings + self.baseurl = 'https://comicvine.gamespot.com/api/' + self.imageurl = 'https://comicvine.gamespot.com/api/image/' + self.base_params = {'format': 'json', 'api_key': self.api_key} + self.headers = {'user-agent': 'tenma'} + + # API field strings + self.arc_fields = 'deck,description,id,image,name,site_detail_url' + self.character_fields = 'deck,description,id,image,name,site_detail_url' + self.creator_fields = 'deck,description,id,image,name,site_detail_url' + self.issue_fields = 'api_detail_url,character_credits,cover_date,deck,description,id,image,issue_number,name,person_credits,site_detail_url,story_arc_credits,team_credits,volume' + self.publisher_fields = 'deck,description,id,image,name,site_detail_url' + self.query_issue_fields = 'cover_date,id,issue_number,name,volume' + self.query_issue_limit = '100' + self.series_fields = 'api_detail_url,deck,description,id,name,publisher,site_detail_url,start_year' + self.team_fields = 'characters,deck,description,id,image,name,site_detail_url' + + # International reprint publishers + # Ordered by # of issues (est.) for quick matching. + self.int_pubs = [ + 2350, # Panini (21.5k) + 2812, # Marvel UK (4.2k) + 2094, # Abril (2.1k) + 2319, # Planeta DeAgostini (2.1k) + 2903, # Ediciones Zinco (0.7k) + 1133, # Semic As (0.3k) + 2961, # Marvel Italia (0.04k) + ] + + #========================================================================= + + def import_comic_files(self): + ''' Main entry point to import comic files. ''' + + excluded = Issue.objects.values_list( + 'file', flat=True).order_by('file') + self._process_dir(self.directory_path, excluded) + + #========================================================================= + + def _process_dir(self, path, excluded): + for entry in os.scandir(path): + # If file, process issue. If directory, process series. + if entry.is_file(): + # Check for unprocessed files: + if entry.path not in excluded: + # Check comic file validity + if utils.valid_comic_file(entry.name): + # Attempt to find match + cvid = '' + if self.api_key != '': + cvid = self._find_issue_match(entry.name) + if cvid != '': + # Process issue with ComicVine + try: + self._process_issue(entry.path, cvid) + except Exception: + self.logger.exception( + 'An error occurred while processing %s' % entry.path) + else: + # Process issue without ComicVine + try: + self._process_issue_without_cvid( + entry.path) + except Exception: + self.logger.exception( + 'An error occurred while processing %s' % entry.path) + else: + self._process_dir(entry.path, excluded) + + #========================================================================= + + def reprocess_issue(self, issue_id): + ''' Reprocess an existing issue in the comics directories. ''' + + issue = Issue.objects.get(id=issue_id) + cvid = '' + + # Check if there's already a cvid. + if issue.cvid and issue.cvid != '': + cvid = issue.cvid + else: + # Attempt to find match + if self.api_key != '': + cvid = self._find_issue_match(issue.file) + else: + cvid = '' + + # Update issue + if cvid != '': + # Process the issue with ComicVine + self._process_issue(issue.file, cvid) + else: + self._reprocess_issue_without_cvid(issue.id) + + #========================================================================= + + def _find_issue_match(self, filename): + ''' + Try to find a match in ComicVine for an issue. + + Returns a ComicVine ID. + ''' + + # Initialize response + found_issue = None + cvid = '' + + # Attempt to extract series name, issue number, and year + extracted = fnameparser.extract(filename) + series_name = utils.remove_special_characters(extracted[0]) + series_name_url = quote_plus(series_name) + issue_number = extracted[1] if extracted[1] else '1' + issue_year = extracted[2] + + # First check if there's already a series locally + matching_series = Series.objects.filter(name=series_name) + + if matching_series: + if not matching_series[0].cvid == '': + found_issue = self._find_match_with_series( + matching_series[0].cvid, issue_number) + + if found_issue is None: + # Query Parameters + query_params = self.base_params + query_params['resources'] = 'issue' + query_params['field_list'] = self.query_issue_fields + query_params['limit'] = self.query_issue_limit + + # Check for series name and issue number, or just series name + if series_name and issue_number: + query_params['query'] = series_name + ' ' + issue_number + query_response = requests.get( + self.baseurl + 'search', + params=query_params, + headers=self.headers + ).json() + elif series_name: + query_params['query'] = series_name_url + query_response = requests.get( + self.baseurl + 'search', + params=query_params, + headers=self.headers + ).json() + + best_option_list = [] + + # Try to find the closest match. + for issue in query_response['results']: + item_year = datetime.date.today().year + item_number = 1 + item_name = '' + item_pub_id = '' + + if 'cover_date' in issue: + if issue['cover_date']: + item_year = issue['cover_date'][0:4] + if 'issue_number' in issue: + if issue['issue_number']: + item_number = issue['issue_number'] + if 'volume' in issue: + if 'name' in issue['volume']: + if issue['volume']['name']: + item_name = issue['volume']['name'] + item_name = utils.remove_special_characters( + item_name) + + # Get publisher ID + pub_check_params = self.base_params + pub_check_params['field_list'] = 'publisher' + pub_check_response = requests.get( + self.baseurl + 'volume/4050-' + + str(issue['volume']['id']), + params=pub_check_params, + headers=self.headers, + ).json() + + if 'publisher' in pub_check_response['results']: + if pub_check_response['results']['publisher'] is not None: + item_pub_id = pub_check_response[ + 'results']['publisher']['id'] + + # Get the match score (0-5) + if series_name: + # Fuzzy match the series name. + # Gives a score between 0 and 2. + score = (fuzz.ratio(item_name.lower(), series_name.lower( + )) + fuzz.partial_ratio(item_name.lower(), series_name.lower())) / 100 + + # If the issue number is the same, add 1 point. + if item_number == issue_number: + score += 1 + + # If the year is the same, add 2 points. + if issue_year != '': + if item_year == issue_year: + score += 2 + + # If the publisher is an international reprint, subtract a + # point. + if item_pub_id != '': + if item_pub_id in self.int_pubs: + score = score - 1 if score > 1 else 0 + + # Add the issue and it's score to the list. + best_option_list.insert(0, { + 'score': score, + 'issue': issue, + }) + + # Sort the list by score, and pick the top scoring issue. + best_option_list.sort(key=lambda x: x['score'], reverse=True) + found_issue = best_option_list[0][ + 'issue'] if best_option_list else None + + cvid = found_issue['id'] if found_issue else '' + + if found_issue is not None: + if 'volume' in found_issue: + if 'name' in found_issue['volume']: + if found_issue['volume']['name']: + series = found_issue['volume']['name'] + elif matching_series: + series = matching_series[0].name + if 'issue_number' in found_issue: + if found_issue['issue_number']: + number = found_issue['issue_number'] + else: + number = '' + self.logger.info('\"%(filename)s\" was matched on Comic Vine as \"%(series)s - #%(number)s\" (%(CVID)s)' % { + 'filename': filename, + 'series': series, + 'number': number, + 'CVID': cvid + }) + else: + self.logger.warning( + 'No match was found for \"%s\" on Comic Vine.' % filename) + + return cvid + + #========================================================================= + + def _find_match_with_series(self, series_cvid, issue_number): + ''' + Try to retrieve a match based on an existing series name. + + Returns an issue from list. + ''' + + found_issue = None + + if issue_number: + # Query Parameters + query_params = self.base_params + query_params['field_list'] = 'issues,name' + + # Attempt to find issue based on extracted Series Name and Issue + # Number + query_response = requests.get( + self.baseurl + 'volume/4050-' + str(series_cvid), + params=query_params, + headers=self.headers, + ).json() + + # Try to find the closest match. + for issue in query_response['results']['issues']: + item_number = issue['issue_number'] if issue[ + 'issue_number'] else '' + if item_number == issue_number: + found_issue = issue + + return found_issue + + #========================================================================= + + def _process_issue_without_cvid(self, filepath): + ''' Create an issue without a ComicVine ID. ''' + + # Make sure the issue hadn't already been added + matching_issue = Issue.objects.filter(file=filepath) + + filename = os.path.basename(filepath) + + if not matching_issue: + # 1. Attempt to extract series name, issue number, and year + extracted = fnameparser.extract(filepath) + series_name = extracted[0] + issue_number = extracted[1] + issue_year = extracted[2] + + # 2. Set Issue Information: + issue = Issue() + issue.file = filepath + issue.number = issue_number if issue_number else 1 + issue.date = issue_year + '-01-01' if issue_year else datetime.date.today() + + cfh = ComicFileHandler() + issue.cover = cfh.extract_cover(filepath) + issue.page_count = cfh.get_page_count(filepath) + + # 3. Set Series Information: + matching_series = Series.objects.filter(name=series_name) + + if not matching_series: + series = Series() + series.name = series_name + series.save() + issue.series = series + else: + issue.series = matching_series[0] + + # 4. Save Issue. + issue.save() + else: + self._reprocess_issue_without_cvid(matching_issue[0].id) + + self.logger.info('\"%(filename)s\" was processed successfully as \"%(series)s - #%(number)s\"' % { + 'filename': filename, + 'series': issue.series.name, + 'number': issue.number + }) + + #========================================================================= + + def _reprocess_issue_without_cvid(self, issue_id): + ''' Create an issue without a ComicVine ID. ''' + + # Make sure the issue exists + issue = Issue.objects.get(id=issue_id) + + if issue: + # 1. Attempt to extract series name, issue number, year and cover. + extracted = fnameparser.extract(issue.file) + series_name = extracted[0] + issue_number = extracted[1] + issue_year = extracted[2] + + cfh = ComicFileHandler() + issue_cover = cfh.extract_cover(issue.file) + issue.page_count = cfh.get_page_count(issue.file) + + # 2. Update Issue information: + Issue.objects.filter(id=issue_id).update( + number=issue_number if issue_number else 1, + date=issue_year + '-01-01' if issue_year else datetime.date.today(), + cover=issue_cover, + ) + + # 3. Update Series information: + if Series.objects.get(id=issue.series.id): + Series.objects.filter(id=issue.series.id).update( + name=series_name, + ) + else: + series = Series() + series.name = series_name + series.save() + issue.series = series + issue.save() + + #========================================================================= + + def _process_issue(self, filename, cvid): + ''' Creates or updates metadata from ComicVine for an Issue. ''' + + # 1. Make initial API call + # Query Parameters + issue_params = self.base_params + issue_params['field_list'] = self.issue_fields + + response_issue = requests.get( + self.baseurl + 'issue/4000-' + str(cvid), + params=issue_params, + headers=self.headers, + ).json() + + # 2. Set Series + matching_series = Series.objects.filter( + cvid=response_issue['results']['volume']['id']) + + if not matching_series: + series = self._create_series(response_issue['results'][ + 'volume']['api_detail_url']) + else: + series = self._update_series(matching_series[0].id, response_issue[ + 'results']['volume']['api_detail_url']) + + # 3. Set Issue + matching_issue = Issue.objects.filter(file=filename) + + if not matching_issue: + issue = self._create_issue(filename, response_issue['results'][ + 'api_detail_url'], series.id) + else: + issue = self._update_issue(matching_issue[0].id, response_issue[ + 'results']['api_detail_url'], series.id) + + # 4. Set Publisher + # Query Parameters + series_params = self.base_params + series_params['field_list'] = 'publisher' + + response_series = requests.get( + response_issue['results']['volume']['api_detail_url'], + params=series_params, + headers=self.headers, + ).json() + + matching_publisher = Publisher.objects.filter( + cvid=response_series['results']['publisher']['id']) + + if not matching_publisher: + self._create_publisher(response_series['results']['publisher'][ + 'api_detail_url'], issue.series.id) + else: + self._update_publisher(matching_publisher[0].id, response_series['results'][ + 'publisher']['api_detail_url'], issue.series.id) + + # 5. Set Arcs + for story_arc in response_issue['results']['story_arc_credits']: + matching_arc = Arc.objects.filter(cvid=story_arc['id']) + if not matching_arc: + self._create_arc(story_arc['api_detail_url'], issue.id) + else: + issue.arcs.add(self._update_arc( + matching_arc[0].id, story_arc['api_detail_url'])) + + # 6. Set Characters + for character in response_issue['results']['character_credits']: + matching_character = Character.objects.filter(cvid=character['id']) + if not matching_character: + self._create_character(character['api_detail_url'], issue.id) + else: + issue.characters.add(self._update_character( + matching_character[0].id, character['api_detail_url'])) + + # 7. Set Creators + for person in response_issue['results']['person_credits']: + matching_creator = Creator.objects.filter(cvid=person['id']) + if not matching_creator: + self._create_creator(person['api_detail_url'], re.sub( + ' ', '', person['role']), issue.id) + else: + Roles.objects.create( + creator=matching_creator[0], + issue=issue, + roles=re.sub(' ', '', person['role']) + ) + + # 8. Set Teams + for team in response_issue['results']['team_credits']: + matching_team = Team.objects.filter(cvid=team['id']) + if not matching_team: + self._create_team(team['api_detail_url'], issue.id) + else: + issue.teams.add(self._update_team( + matching_team[0].id, team['api_detail_url'])) + + self.logger.info('\"%(filename)s\" was processed successfully as \"%(series)s - #%(number)s\" (%(CVID)s)' % { + 'filename': filename, + 'series': series.name, + 'number': issue.number, + 'CVID': issue.cvid + }) + + #========================================================================= + + def _get_object_data(self, response): + ''' + Gathers object data from a response and tests each value to make sure + it exists in the response before trying to set it. + + CVID and CVURL will always exist in a ComicVine response, so there + is no need to verify this data. + + Returns a dictionary with all the gathered data. + ''' + + # Get Name + name = '' + + if 'name' in response: + if response['name']: + name = response['name'] + + # Get Start Year (only exists for Series objects) + year = '' + + if 'start_year' in response: + if response['start_year']: + year = response['start_year'] + + # Get Number (only exists for Issue objects) + number = '' + + if 'issue_number' in response: + if response['issue_number']: + number = response['issue_number'] + + # Get Date (only exists for Issue objects) + date = datetime.date.today() + + if 'cover_date' in response: + if response['cover_date']: + date = response['cover_date'] + + # Get Description (Favor short description if available) + desc = '' + + if 'deck' in response: + if response['deck']: + desc = response['deck'] + if desc == '': + if 'description' in response: + if response['description']: + desc = utils.parse_CV_HTML(response['description']) + + # Get Image + image = '' + + if 'image' in response: + if response['image']: + image_url = self.imageurl + \ + response['image']['super_url'].rsplit('/', 1)[-1] + image_filename = unquote_plus(image_url.split('/')[-1]) + if image_filename != '1-male-good-large.jpg' and not re.match(".*question_mark_large.*.jpg", image_filename): + image = utils.test_image(urlretrieve( + image_url, 'media/images/' + image_filename)[0]) + + # Create data object + data = { + 'cvid': response['id'], # Always exists + 'cvurl': response['site_detail_url'], # Always exists + 'name': name, + 'year': year, + 'number': number, + 'date': date, + 'desc': desc, + 'image': image, + } + + return data + + #========================================================================= + + def _create_arc(self, api_url, issue_id): + ''' + Creates Arc from ComicVine API URL and adds it to + it's corresponding Issue. + + Returns the Arc object created. + ''' + + # Request and Response + params = self.base_params + params['field_list'] = self.arc_fields + + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() + + data = self._get_object_data(response['results']) + + issue = Issue.objects.get(id=issue_id) + + # Create Arc + a = issue.arcs.create( + cvid=data['cvid'], + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + image=data['image'], + ) + + return a + + #========================================================================= + + def _create_character(self, api_url, issue_id): + ''' + Creates Character from ComicVine API URL and adds it to + it's corresponding Issue. + + Returns the Character object created. + ''' + + # Request and Response + params = self.base_params + params['field_list'] = self.character_fields + + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() + + data = self._get_object_data(response['results']) + + issue = Issue.objects.get(id=issue_id) + + # Create Character + ch = issue.characters.create( + cvid=data['cvid'], + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + image=data['image'], + ) + + return ch + + #========================================================================= + + def _create_creator(self, api_url, roles, issue_id): + ''' + Creates Creator from ComicVine API URL and adds it to + it's corresponding Issue. + + Returns the Creator object created. + ''' + + # Request and Response + params = self.base_params + params['field_list'] = self.creator_fields + + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() + + data = self._get_object_data(response['results']) + + issue = Issue.objects.get(id=issue_id) + + # Create Creator + cr = Creator.objects.create( + cvid=data['cvid'], + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + image=data['image'], + ) + + # Create Role in issue + r = Roles.objects.create( + creator=cr, + issue=issue, + roles=roles + ) + + return cr + + #========================================================================= + + def _create_issue(self, file, api_url, series_id): + ''' + Creates Issue from ComicVine API URL and adds the + corresponding Series. + + Returns the Issue object created. + ''' + cfh = ComicFileHandler() + + # Request and Response + params = self.base_params + params['field_list'] = self.issue_fields + + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() + + data = self._get_object_data(response['results']) + + series = Series.objects.get(id=series_id) + + # Create Issue + i = Issue.objects.create( + cvid=data['cvid'], + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + number=data['number'], + date=data['date'], + file=file, + series=series, + cover=data['image'], + page_count=cfh.get_page_count(file), + ) + + return i + + #========================================================================= + + def _create_publisher(self, api_url, series_id): + ''' + Creates Publisher from ComicVine API URL and adds it to + it's corresponding Series. + + Returns the Publisher object created. + ''' + + # Request and Response + params = self.base_params + params['field_list'] = self.publisher_fields + + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() + + data = self._get_object_data(response['results']) + + # Create Publisher + p = Publisher.objects.create( + cvid=data['cvid'], + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + logo=data['image'], + ) + + # Add Publisher to Series + series = Series.objects.get(id=series_id) + series.publisher = p + series.save() + + return p + + #========================================================================= + + def _create_team(self, api_url, issue_id): + ''' + Creates Team from ComicVine API URL and adds it to + it's corresponding Issue. + + Returns the Team object created. + ''' + + # Request and Response + params = self.base_params + params['field_list'] = self.team_fields + + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() + + data = self._get_object_data(response['results']) + + issue = Issue.objects.get(id=issue_id) + + # Create Team + t = issue.teams.create( + cvid=data['cvid'], + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + image=data['image'], + ) + + # Add existing Characters to Team + for character in response['results']['characters']: + matching_character = Character.objects.filter(cvid=character['id']) + if matching_character: + team_item = Team.objects.filter(cvid=t.cvid) + matching_character[0].teams.add(team_item[0]) + + return t - #================================================================================================== - - def _create_series(self, api_url): - ''' - Creates Series from ComicVine API URL. - - Returns the Series object created. - ''' - - # Request and Response - params = self.base_params - params['field_list'] = self.series_fields + #========================================================================= + + def _create_series(self, api_url): + ''' + Creates Series from ComicVine API URL. + + Returns the Series object created. + ''' + + # Request and Response + params = self.base_params + params['field_list'] = self.series_fields - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() - data = self._get_object_data(response['results']) - - # Create Series - s = Series.objects.create( - cvid=data['cvid'], - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - year=data['year'], - ) + data = self._get_object_data(response['results']) + + # Create Series + s = Series.objects.create( + cvid=data['cvid'], + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + year=data['year'], + ) - return s + return s - #================================================================================================== + #========================================================================= - def _update_arc(self, obj_id, api_url): - ''' - Updates Arc from ComicVine API URL. + def _update_arc(self, obj_id, api_url): + ''' + Updates Arc from ComicVine API URL. - Returns the Arc object udpated. - ''' + Returns the Arc object udpated. + ''' - # Request and Response - params = self.base_params - params['field_list'] = self.arc_fields + # Request and Response + params = self.base_params + params['field_list'] = self.arc_fields - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() - data = self._get_object_data(response['results']) + data = self._get_object_data(response['results']) - # Update Arc - Arc.objects.filter(id=obj_id).update( - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - image=data['image'], - ) - - return Arc.objects.get(id=obj_id) + # Update Arc + Arc.objects.filter(id=obj_id).update( + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + image=data['image'], + ) + + return Arc.objects.get(id=obj_id) - #================================================================================================== + #========================================================================= - def _update_character(self, obj_id, api_url): - ''' - Updates Character from ComicVine API URL. + def _update_character(self, obj_id, api_url): + ''' + Updates Character from ComicVine API URL. - Returns the Character object udpated. - ''' + Returns the Character object udpated. + ''' - # Request and Response - params = self.base_params - params['field_list'] = self.character_fields + # Request and Response + params = self.base_params + params['field_list'] = self.character_fields - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() - data = self._get_object_data(response['results']) + data = self._get_object_data(response['results']) - # Update Character - Character.objects.filter(id=obj_id).update( - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - image=data['image'], - ) + # Update Character + Character.objects.filter(id=obj_id).update( + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + image=data['image'], + ) - return Character.objects.get(id=obj_id) + return Character.objects.get(id=obj_id) - #================================================================================================== + #========================================================================= - def _update_creator(self, obj_id, api_url): - ''' - Updates Creator from ComicVine API URL. + def _update_creator(self, obj_id, api_url): + ''' + Updates Creator from ComicVine API URL. - Returns the Creator object udpated. - ''' + Returns the Creator object udpated. + ''' - # Request and Response - params = self.base_params - params['field_list'] = self.creator_fields + # Request and Response + params = self.base_params + params['field_list'] = self.creator_fields - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() - data = self._get_object_data(response['results']) + data = self._get_object_data(response['results']) - # Update Creator - Creator.objects.filter(id=obj_id).update( - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - image=data['image'], - ) + # Update Creator + Creator.objects.filter(id=obj_id).update( + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + image=data['image'], + ) - return Creator.objects.get(id=obj_id) + return Creator.objects.get(id=obj_id) - #================================================================================================== + #========================================================================= - def _update_issue(self, obj_id, api_url, series_id): - ''' - Updates Issue from ComicVine API URL. + def _update_issue(self, obj_id, api_url, series_id): + ''' + Updates Issue from ComicVine API URL. - Returns the Issue object udpated. - ''' + Returns the Issue object udpated. + ''' - # Request and Response - params = self.base_params - params['field_list'] = self.issue_fields + # Request and Response + params = self.base_params + params['field_list'] = self.issue_fields - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() - data = self._get_object_data(response['results']) + data = self._get_object_data(response['results']) - issue =Issue.objects.get(id=obj_id) - self._reset_issue(issue.id) + issue = Issue.objects.get(id=obj_id) + self._reset_issue(issue.id) - series = Series.objects.get(id=series_id) + series = Series.objects.get(id=series_id) - # Update Issue - Issue.objects.filter(id=obj_id).update( - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - number=data['number'], - date=data['date'], - series=series, - cover=data['image'], - ) + # Update Issue + Issue.objects.filter(id=obj_id).update( + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + number=data['number'], + date=data['date'], + series=series, + cover=data['image'], + ) - return Issue.objects.get(id=obj_id) + return Issue.objects.get(id=obj_id) - #================================================================================================== + #========================================================================= - def _update_publisher(self, obj_id, api_url, series_id): - ''' - Updates Publisher from ComicVine API URL. + def _update_publisher(self, obj_id, api_url, series_id): + ''' + Updates Publisher from ComicVine API URL. - Returns the Publisher object udpated. - ''' + Returns the Publisher object udpated. + ''' - # Request and Response - params = self.base_params - params['field_list'] = self.publisher_fields + # Request and Response + params = self.base_params + params['field_list'] = self.publisher_fields - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() - data = self._get_object_data(response['results']) + data = self._get_object_data(response['results']) - # Update Publisher - Publisher.objects.filter(id=obj_id).update( - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - logo=data['image'], - ) + # Update Publisher + Publisher.objects.filter(id=obj_id).update( + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + logo=data['image'], + ) - # Add Publisher to Series - series = Series.objects.get(id=series_id) - series.publisher = Publisher.objects.get(id=obj_id) - series.save() + # Add Publisher to Series + series = Series.objects.get(id=series_id) + series.publisher = Publisher.objects.get(id=obj_id) + series.save() - return Publisher.objects.get(id=obj_id) + return Publisher.objects.get(id=obj_id) - #================================================================================================== + #========================================================================= - def _update_team(self, obj_id, api_url): - ''' - Updates Team from ComicVine API URL. + def _update_team(self, obj_id, api_url): + ''' + Updates Team from ComicVine API URL. - Returns the Team object udpated. - ''' + Returns the Team object udpated. + ''' - # Request and Response - params = self.base_params - params['field_list'] = self.team_fields + # Request and Response + params = self.base_params + params['field_list'] = self.team_fields - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() - data = self._get_object_data(response['results']) + data = self._get_object_data(response['results']) - # Update Team - Team.objects.filter(id=obj_id).update( - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - image=data['image'], - ) + # Update Team + Team.objects.filter(id=obj_id).update( + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + image=data['image'], + ) - return Team.objects.get(id=obj_id) + return Team.objects.get(id=obj_id) - #================================================================================================== + #========================================================================= - def _update_series(self, obj_id, api_url): - ''' - Updates Series from ComicVine API URL. + def _update_series(self, obj_id, api_url): + ''' + Updates Series from ComicVine API URL. - Returns the Series object udpated. - ''' + Returns the Series object udpated. + ''' - # Request and Response - params = self.base_params - params['field_list'] = self.series_fields + # Request and Response + params = self.base_params + params['field_list'] = self.series_fields - response = requests.get( - api_url, - params=params, - headers=self.headers, - ).json() - - data = self._get_object_data(response['results']) - - # Update Series - Series.objects.filter(id=obj_id).update( - cvurl=data['cvurl'], - name=data['name'], - desc=data['desc'], - year=data['year'], - ) + response = requests.get( + api_url, + params=params, + headers=self.headers, + ).json() + + data = self._get_object_data(response['results']) + + # Update Series + Series.objects.filter(id=obj_id).update( + cvurl=data['cvurl'], + name=data['name'], + desc=data['desc'], + year=data['year'], + ) - return Series.objects.get(id=obj_id) + return Series.objects.get(id=obj_id) - #================================================================================================== + #========================================================================= - def _reset_issue(self, obj_id): - ''' - Resets an Issue's fields. + def _reset_issue(self, obj_id): + ''' + Resets an Issue's fields. - Returns the Issue object that was reset. - ''' - issue = Issue.objects.get(id=obj_id) + Returns the Issue object that was reset. + ''' + issue = Issue.objects.get(id=obj_id) - issue.cvurl = '' - issue.name = '' - issue.number = 1 - issue.date = datetime.date.today() - issue.desc = '' - issue.arcs.clear() - issue.characters.clear() - Roles.objects.filter(issue=issue).delete() - issue.teams.clear() - issue.cover = '' + issue.cvurl = '' + issue.name = '' + issue.number = 1 + issue.date = datetime.date.today() + issue.desc = '' + issue.arcs.clear() + issue.characters.clear() + Roles.objects.filter(issue=issue).delete() + issue.teams.clear() + issue.cover = '' - issue.save() + issue.save() - return Issue.objects.get(id=obj_id) + return Issue.objects.get(id=obj_id) From 5248518634d7cd3065677e3f2ae5e2d9039cbd70 Mon Sep 17 00:00:00 2001 From: Brian Pepple Date: Tue, 7 Nov 2017 17:49:47 -0500 Subject: [PATCH 3/3] Change issue models number field from an int to CharField * Added migration to modify existing data to a 3-padded field Charfield (ex. #001) * Modified the comicimporter.py to add the issue number as a 3-padded String. * Added the comicarchive api to use it's IssueString function. --- comics/migrations/0005_issue_number.py | 42 ++ comics/models.py | 2 +- comics/utils/comicapi/__init__.py | 0 comics/utils/comicapi/comicarchive.py | 627 +++++++++++++++++++++++ comics/utils/comicapi/comicbookinfo.py | 139 +++++ comics/utils/comicapi/comicinfoxml.py | 280 ++++++++++ comics/utils/comicapi/comicutils.py | 459 +++++++++++++++++ comics/utils/comicapi/filenameparser.py | 283 ++++++++++ comics/utils/comicapi/genericmetadata.py | 294 +++++++++++ comics/utils/comicapi/issuestring.py | 116 +++++ comics/utils/comicimporter.py | 12 +- 11 files changed, 2249 insertions(+), 5 deletions(-) create mode 100644 comics/migrations/0005_issue_number.py create mode 100644 comics/utils/comicapi/__init__.py create mode 100644 comics/utils/comicapi/comicarchive.py create mode 100644 comics/utils/comicapi/comicbookinfo.py create mode 100644 comics/utils/comicapi/comicinfoxml.py create mode 100644 comics/utils/comicapi/comicutils.py create mode 100644 comics/utils/comicapi/filenameparser.py create mode 100644 comics/utils/comicapi/genericmetadata.py create mode 100644 comics/utils/comicapi/issuestring.py diff --git a/comics/migrations/0005_issue_number.py b/comics/migrations/0005_issue_number.py new file mode 100644 index 0000000..efae287 --- /dev/null +++ b/comics/migrations/0005_issue_number.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.1 on 2017-11-07 21:42 +from __future__ import unicode_literals + +from django.db import migrations, models + +from ..utils.comicapi.issuestring import IssueString + + +def add_issue_number_data(apps, schema_editor): + + Issue = apps.get_model('comics', 'Issue') + query = Issue.objects.all() + for issue in query: + new_number = IssueString(issue.number).asString(pad=3) + issue.number = new_number + issue.save() + + +def remove_issue_number_data(apps, schema_editor): + + Issue = apps.get_model('comics', 'Issue') + Issue.objects.update(number='') + + +class Migration(migrations.Migration): + + dependencies = [ + ('comics', '0004_issue_page_count'), + ] + + operations = [ + migrations.AlterField( + model_name='issue', + name='number', + field=models.CharField(max_length=25, verbose_name='Issue number'), + ), + migrations.RunPython( + add_issue_number_data, + reverse_code=remove_issue_number_data + ), + ] diff --git a/comics/models.py b/comics/models.py index ba56d4e..b2ca93a 100644 --- a/comics/models.py +++ b/comics/models.py @@ -155,7 +155,7 @@ class Issue(models.Model): cvurl = models.URLField(max_length=200, blank=True) series = models.ForeignKey(Series, on_delete=models.CASCADE, blank=True) name = models.CharField('Issue name', max_length=200, blank=True) - number = models.PositiveSmallIntegerField('Issue number') + number = models.CharField('Issue number', max_length=25) date = models.DateField('Cover date', blank=True) desc = models.TextField('Description', max_length=500, blank=True) arcs = models.ManyToManyField(Arc, blank=True) diff --git a/comics/utils/comicapi/__init__.py b/comics/utils/comicapi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/comics/utils/comicapi/comicarchive.py b/comics/utils/comicapi/comicarchive.py new file mode 100644 index 0000000..bb6ff8b --- /dev/null +++ b/comics/utils/comicapi/comicarchive.py @@ -0,0 +1,627 @@ +''' +Copyright 2012-2014 Anthony Beville +Copyright 2017 Brian Pepple +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import zipfile +import sys +import tempfile +import os +from io import StringIO + +from natsort import natsorted + +try: + from PIL import Image + pil_available = True +except ImportError: + pil_available = False + +from .comicinfoxml import ComicInfoXML +from .comicbookinfo import ComicBookInfo +from .genericmetadata import GenericMetadata +from .filenameparser import FileNameParser + +class MetaDataStyle: + CBI = 0 + CIX = 1 + name = ['ComicBookLover', 'ComicRack'] + + +class ZipArchiver: + ''' Zip Implementation ''' + + def __init__(self, path): + self.path = path + + def getArchiveComment(self): + zf = zipfile.ZipFile(self.path, "r") + comment = zf.comment + zf.close() + return comment + + def setArchiveComment(self, comment): + try: + zf = zipfile.ZipFile(self.path, "a") + zf.comment = comment + zf.close() + except: + return False + else: + return True + + def readArchiveFile(self, archive_file): + data = "" + zf = zipfile.ZipFile(self.path, "r") + + try: + data = zf.read(archive_file) + except zipfile.BadZipfile as e: + print >> sys.stderr, u"Bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file) + zf.close() + raise IOError + except Exception as e: + zf.close() + print >> sys.stderr, u"Bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file) + raise IOError + finally: + zf.close() + return data + + def removeArchiveFile(self, archive_file): + try: + self.rebuildZipFile([archive_file]) + except: + return False + else: + return True + + def writeArchiveFile(self, archive_file, data): + try: + self.rebuildZipFile([archive_file]) + # Now add the archive file as a new one + zf = zipfile.ZipFile(self.path, mode="a", compression=zipfile.ZIP_DEFLATED) + zf.writestr(archive_file, data) + zf.close() + return True + except: + return False + + def getArchiveFilenameList(self): + try: + zf = zipfile.ZipFile(self.path, "r") + namelist = zf.namelist() + zf.close() + return namelist + except Exception as e: + print >> sys.stderr, u"Unable to get zipfile list [{0}]: {1}".format(e, self.path) + return [] + + def rebuildZipFile(self, exclude_list): + """ + Zip helper func + + This recompresses the zip archive, without the files in the exclude_list + """ + tmp_fd, tmp_name = tempfile.mkdtemp(dir=os.path.dirname(self.path)) + os.close(tmp_fd) + + zin = zipfile.ZipFile(self.path, "r") + zout = zipfile.ZipFile(tmp_name, "w") + for item in zin.infolist(): + buffer = zin.read(item.filename) + if (item.filename not in exclude_list): + zout.writestr(item, buffer) + + # Preserve the old comment + zout.comment = zin.comment + + zout.close() + zin.close() + + # Replace with the new file + os.remove(self.path) + os.rename(tmp_name, self.path) + + def copyFromArchive(self, otherArchive): + """ Replace the current zip with one copied from another archive. """ + try: + zout = zipfile.ZipFile(self.path, "w") + for fname in otherArchive.getArchiveFilenameList(): + data = otherArchive.readArchiveFile(fname) + if data is not None: + zout.writestr(fname, data) + zout.close() + + # Preserve the old comment + comment = otherArchive.getArchiveComment() + if comment is not None: + if not self.writeZipComment(self.path, comment): + return False + except Exception as e: + print >> sys.stderr, u"Error while copying to {0}: {1}".format(self.path, e) + return False + else: + return True + + +class FolderArchiver: + + """Folder implementation""" + + def __init__(self, path): + self.path = path + self.comment_file_name = "ComicTaggerFolderComment.txt" + + def getArchiveComment(self): + return self.readArchiveFile(self.comment_file_name) + + def setArchiveComment(self, comment): + return self.writeArchiveFile(self.comment_file_name, comment) + + def readArchiveFile(self, archive_file): + + data = "" + fname = os.path.join(self.path, archive_file) + try: + with open(fname, 'rb') as f: + data = f.read() + f.close() + except IOError: + pass + + return data + + def writeArchiveFile(self, archive_file, data): + + fname = os.path.join(self.path, archive_file) + try: + with open(fname, 'w+') as f: + f.write(data) + f.close() + except: + return False + else: + return True + + def removeArchiveFile(self, archive_file): + + fname = os.path.join(self.path, archive_file) + try: + os.remove(fname) + except: + return False + else: + return True + + def getArchiveFilenameList(self): + return self.listFiles(self.path) + + def listFiles(self, folder): + + itemlist = list() + + for item in os.listdir(folder): + itemlist.append(item) + if os.path.isdir(item): + itemlist.extend(self.listFiles(os.path.join(folder, item))) + + return itemlist + + +class UnknownArchiver: + + """Unknown implementation""" + + def __init__(self, path): + self.path = path + + def getArchiveComment(self): + return "" + + def setArchiveComment(self, comment): + return False + + def readArchiveFile(self): + return "" + + def writeArchiveFile(self, archive_file, data): + return False + + def removeArchiveFile(self, archive_file): + return False + + def getArchiveFilenameList(self): + return [] + + +class ComicArchive: + logo_data = None + + class ArchiveType: + Zip, Folder, Unknown = range(3) + + def __init__(self, path, default_image_path=None): + self.path = path + self.ci_xml_filename = 'ComicInfo.xml' + self.resetCache() + # Use file extension to decide which archive test we do first + ext = os.path.splitext(path)[1].lower() + self.archive_type = self.ArchiveType.Unknown + self.archiver = UnknownArchiver(self.path) + self.default_image_path = default_image_path + + if ext == ".cbz" or ext == ".zip": + if self.zipTest(): + self.archive_type = self.ArchiveType.Zip + self.archiver = ZipArchiver(self.path) + + def resetCache(self): + """ Clears the cached data """ + self.has_cix = None + self.has_cbi = None + self.page_count = None + self.page_list = None + self.cix_md = None + + def loadCache(self, style_list): + for style in style_list: + self.readMetadata(style) + + def rename(self, path): + self.path = path + self.archiver.path = path + + def zipTest(self): + return zipfile.is_zipfile(self.path) + + def isZip(self): + return self.archive_type == self.ArchiveType.Zip + + def isFolder(self): + return self.archive_type == self.ArchiveType.Folder + + def isWritable(self): + if self.archive_type == self.ArchiveType.Unknown: + return False + elif not os.access(self.path, os.W_OK): + return False + elif ((self.archive_type != self.ArchiveType.Folder) and + (not os.access(os.path.dirname(os.path.abspath(self.path)), os.W_OK))): + return False + return True + + def isWritableForStyle(self, data_style): + if data_style == MetaDataStyle.CBI: + return False + + return self.isWritable() + + def seemsToBeAComicArchive(self): + if (self.isZip() and (self.getNumberOfPages() > 0)): + return True + else: + return False + + def readMetadata(self, style): + if style == MetaDataStyle.CIX: + return self.readCIX() + elif style == MetaDataStyle.CBI: + return self.readCBI() + else: + return GenericMetadata() + + def writeMetadata(self, metadata, style): + retcode = None + if style == MetaDataStyle.CIX: + retcode = self.writeCIX(metadata) + elif style == MetaDataStyle.CBI: + retcode = self.writeCBI(metadata) + return retcode + + def hasMetadata(self, style): + if style == MetaDataStyle.CIX: + return self.hasCIX() + elif style == MetaDataStyle.CBI: + return self.hasCBI() + else: + return False + + def removeMetadata(self, style): + retcode = True + if style == MetaDataStyle.CIX: + retcode = self.removeCIX() + elif style == MetaDataStyle.CBI: + retcode = self.removeCBI() + return retcode + + def getPage(self, index): + image_data = None + filename = self.getPageName(index) + + if filename is not None: + try: + image_data = self.archiver.readArchiveFile(filename) + except IOError: + print >> sys.stderr, u"Error reading in page. Substituting logo page." + image_data = ComicArchive.logo_data + + return image_data + + def getPageName(self, index): + if index is None: + return None + + page_list = self.getPageNameList() + + num_pages = len(page_list) + if num_pages == 0 or index >= num_pages: + return None + + return page_list[index] + + def getScannerPageIndex(self): + scanner_page_index = None + + # make a guess at the scanner page + name_list = self.getPageNameList() + count = self.getNumberOfPages() + + # too few pages to really know + if count < 5: + return None + + # count the length of every filename, and count occurences + length_buckets = dict() + for name in name_list: + fname = os.path.split(name)[1] + length = len(fname) + if length in length_buckets: + length_buckets[length] += 1 + else: + length_buckets[length] = 1 + + # sort by most common + sorted_buckets = sorted( + length_buckets.iteritems(), + key=lambda k_v: ( + k_v[1], + k_v[0]), + reverse=True) + + # statistical mode occurence is first + mode_length = sorted_buckets[0][0] + + # we are only going to consider the final image file: + final_name = os.path.split(name_list[count - 1])[1] + + common_length_list = list() + for name in name_list: + if len(os.path.split(name)[1]) == mode_length: + common_length_list.append(os.path.split(name)[1]) + + prefix = os.path.commonprefix(common_length_list) + + if mode_length <= 7 and prefix == "": + # probably all numbers + if len(final_name) > mode_length: + scanner_page_index = count - 1 + + # see if the last page doesn't start with the same prefix as most + # others + elif not final_name.startswith(prefix): + scanner_page_index = count - 1 + + return scanner_page_index + + def getPageNameList(self, sort_list=True): + if self.page_list is None: + # get the list file names in the archive, and sort + files = self.archiver.getArchiveFilenameList() + + # seems like some archive creators are on Windows, and don't know + # about case-sensitivity! + if sort_list: + def keyfunc(k): + return k.lower() + files = natsorted(files, key=keyfunc, signed=False) + + # make a sub-list of image files + self.page_list = [] + for name in files: + if (name[-4:].lower() in [".jpg", + "jpeg", + ".png", + ".gif", + "webp"] and os.path.basename(name)[0] != "."): + self.page_list.append(name) + + return self.page_list + + def getNumberOfPages(self): + + if self.page_count is None: + self.page_count = len(self.getPageNameList()) + return self.page_count + + def readCBI(self): + if self.cbi_md is None: + raw_cbi = self.readRawCBI() + if raw_cbi is None: + self.cbi_md = GenericMetadata() + else: + self.cbi_md = ComicBookInfo().metadataFromString(raw_cbi) + + self.cbi_md.setDefaultPageList(self.getNumberOfPages()) + + return self.cbi_md + + def readRawCBI(self): + if (not self.hasCBI()): + return None + + return self.archiver.getArchiveComment() + + def hasCBI(self): + if self.has_cbi is None: + if not self.seemsToBeAComicArchive(): + self.has_cbi = False + else: + comment = self.archiver.getArchiveComment() + self.has_cbi = ComicBookInfo().validateString(comment) + + return self.has_cbi + + def writeCBI(self, metadata): + if metadata is not None: + self.applyArchiveInfoToMetadata(metadata) + cbi_string = ComicBookInfo().stringFromMetadata(metadata) + write_success = self.archiver.setArchiveComment(cbi_string) + if write_success: + self.has_cbi = True + self.cbi_md = metadata + self.resetCache() + return write_success + else: + return False + + def removeCBI(self): + if self.hasCBI(): + write_success = self.archiver.setArchiveComment("") + if write_success: + self.has_cbi = False + self.cbi_md = None + self.resetCache() + return write_success + return True + + def readCIX(self): + if self.cix_md is None: + raw_cix = self.readRawCIX() + if raw_cix is None or raw_cix == "": + self.cix_md = GenericMetadata() + else: + self.cix_md = ComicInfoXML().metadataFromString(raw_cix) + + # validate the existing page list (make sure count is correct) + if len(self.cix_md.pages) != 0: + if len(self.cix_md.pages) != self.getNumberOfPages(): + # pages array doesn't match the actual number of images we're seeing + # in the archive, so discard the data + self.cix_md.pages = [] + + if len(self.cix_md.pages) == 0: + self.cix_md.setDefaultPageList(self.getNumberOfPages()) + + return self.cix_md + + def readRawCIX(self): + if not self.hasCIX(): + return None + try: + raw_cix = self.archiver.readArchiveFile(self.ci_xml_filename) + except IOError: + raw_cix = "" + return raw_cix + + def writeCIX(self, metadata): + if metadata is not None: + self.applyArchiveInfoToMetadata(metadata, calc_page_sizes=True) + cix_string = ComicInfoXML().stringFromMetadata(metadata) + write_success = self.archiver.writeArchiveFile( + self.ci_xml_filename, + cix_string) + if write_success: + self.has_cix = True + self.cix_md = metadata + self.resetCache() + return write_success + else: + return False + + def removeCIX(self): + if self.hasCIX(): + write_success = self.archiver.removeArchiveFile( + self.ci_xml_filename) + if write_success: + self.has_cix = False + self.cix_md = None + self.resetCache() + return write_success + return True + + def hasCIX(self): + if self.has_cix is None: + if not self.seemsToBeAComicArchive(): + self.has_cix = False + elif self.ci_xml_filename in self.archiver.getArchiveFilenameList(): + self.has_cix = True + else: + self.has_cix = False + return self.has_cix + + def applyArchiveInfoToMetadata(self, md, calc_page_sizes=False): + md.pageCount = self.getNumberOfPages() + + if calc_page_sizes: + for p in md.pages: + idx = int(p['Image']) + if pil_available: + if 'ImageSize' not in p or 'ImageHeight' not in p or 'ImageWidth' not in p: + data = self.getPage(idx) + if data is not None: + try: + im = Image.open(StringIO.StringIO(data)) + w, h = im.size + + p['ImageSize'] = str(len(data)) + p['ImageHeight'] = str(h) + p['ImageWidth'] = str(w) + except IOError: + p['ImageSize'] = str(len(data)) + + else: + if 'ImageSize' not in p: + data = self.getPage(idx) + p['ImageSize'] = str(len(data)) + + def metadataFromFilename(self, parse_scan_info=True): + metadata = GenericMetadata() + + fnp = FileNameParser() + fnp.parseFilename(self.path) + + if fnp.issue != "": + metadata.issue = fnp.issue + if fnp.series != "": + metadata.series = fnp.series + if fnp.volume != "": + metadata.volume = fnp.volume + if fnp.year != "": + metadata.year = fnp.year + if fnp.issue_count != "": + metadata.issueCount = fnp.issue_count + if parse_scan_info: + if fnp.remainder != "": + metadata.scanInfo = fnp.remainder + + metadata.isEmpty = False + + return metadata + + def exportAsZip(self, zipfilename): + if self.archive_type == self.ArchiveType.Zip: + # nothing to do, we're already a zip + return True + + zip_archiver = ZipArchiver(zipfilename) + return zip_archiver.copyFromArchive(self.archiver) diff --git a/comics/utils/comicapi/comicbookinfo.py b/comics/utils/comicapi/comicbookinfo.py new file mode 100644 index 0000000..3c0bbd7 --- /dev/null +++ b/comics/utils/comicapi/comicbookinfo.py @@ -0,0 +1,139 @@ +''' +Copyright 2012-2014 Anthony Beville +Copyright 2017 Brian Pepple +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import json +from datetime import datetime + +from .genericmetadata import GenericMetadata +from . import comicutils as utils + +class ComicBookInfo: + + def metadataFromString(self, string): + + cbi_container = json.loads(str(string, 'utf-8')) + + metadata = GenericMetadata() + + cbi = cbi_container['ComicBookInfo/1.0'] + + # helper func + # If item is not in CBI, return None + def xlate(cbi_entry): + if cbi_entry in cbi: + return cbi[cbi_entry] + else: + return None + + metadata.series = xlate('series') + metadata.title = xlate('title') + metadata.issue = xlate('issue') + metadata.publisher = xlate('publisher') + metadata.month = xlate('publicationMonth') + metadata.year = xlate('publicationYear') + metadata.issueCount = xlate('numberOfIssues') + metadata.comments = xlate('comments') + metadata.credits = xlate('credits') + metadata.genre = xlate('genre') + metadata.volume = xlate('volume') + metadata.volumeCount = xlate('numberOfVolumes') + metadata.language = xlate('language') + metadata.country = xlate('country') + metadata.criticalRating = xlate('rating') + metadata.tags = xlate('tags') + + # make sure credits and tags are at least empty lists and not None + if metadata.credits is None: + metadata.credits = [] + if metadata.tags is None: + metadata.tags = [] + + # need to massage the language string to be ISO + if metadata.language is not None: + # reverse look-up + pattern = metadata.language + metadata.language = None + for key in utils.getLanguageDict(): + if utils.getLanguageDict()[key] == pattern.encode('utf-8'): + metadata.language = key + break + + metadata.isEmpty = False + + return metadata + + def stringFromMetadata(self, metadata): + + cbi_container = self.createJSONDictionary(metadata) + return json.dumps(cbi_container) + + def validateString(self, string): + """Verify that the string actually contains CBI data in JSON format""" + + try: + cbi_container = json.loads(string) + except: + return False + + return ('ComicBookInfo/1.0' in cbi_container) + + def createJSONDictionary(self, metadata): + """Create the dictionary that we will convert to JSON text""" + + cbi = dict() + cbi_container = {'appID': 'ComicTagger/' + '1.0.0', # ctversion.version, + 'lastModified': str(datetime.now()), + 'ComicBookInfo/1.0': cbi} + + # helper func + def assign(cbi_entry, md_entry): + if md_entry is not None: + cbi[cbi_entry] = md_entry + + # helper func + def toInt(s): + i = None + if type(s) in [str, int]: + try: + i = int(s) + except ValueError: + pass + return i + + assign('series', metadata.series) + assign('title', metadata.title) + assign('issue', metadata.issue) + assign('publisher', metadata.publisher) + assign('publicationMonth', toInt(metadata.month)) + assign('publicationYear', toInt(metadata.year)) + assign('numberOfIssues', toInt(metadata.issueCount)) + assign('comments', metadata.comments) + assign('genre', metadata.genre) + assign('volume', toInt(metadata.volume)) + assign('numberOfVolumes', toInt(metadata.volumeCount)) + assign('language', utils.getLanguageFromISO(metadata.language)) + assign('country', metadata.country) + assign('rating', metadata.criticalRating) + assign('credits', metadata.credits) + assign('tags', metadata.tags) + + return cbi_container + + def writeToExternalFile(self, filename, metadata): + + cbi_container = self.createJSONDictionary(metadata) + + f = open(filename, 'w') + f.write(json.dumps(cbi_container, indent=4)) + f.close diff --git a/comics/utils/comicapi/comicinfoxml.py b/comics/utils/comicapi/comicinfoxml.py new file mode 100644 index 0000000..57582b7 --- /dev/null +++ b/comics/utils/comicapi/comicinfoxml.py @@ -0,0 +1,280 @@ +''' +Copyright 2012-2014 Anthony Beville +Copyright 2017 Brian Pepple +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import xml.etree.ElementTree as ET + +from .genericmetadata import GenericMetadata +from . import comicutils as utils + +class ComicInfoXML(object): + writer_synonyms = ['writer', 'plotter', 'scripter'] + penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns'] + inker_synonyms = ['inker', 'artist', 'finishes'] + colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer'] + letterer_synonyms = ['letterer'] + cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist'] + editor_synonyms = ['editor'] + + def getParseableCredits(self): + parseable_credits = [] + parseable_credits.extend(self.writer_synonyms) + parseable_credits.extend(self.penciller_synonyms) + parseable_credits.extend(self.inker_synonyms) + parseable_credits.extend(self.colorist_synonyms) + parseable_credits.extend(self.letterer_synonyms) + parseable_credits.extend(self.colorist_synonyms) + parseable_credits.extend(self.editor_synonyms) + + return parseable_credits + + def metadataFromString(self, s): + tree = ET.ElementTree(ET.fromstring(s)) + + return self.convertXMLToMetadata(tree) + + def stringFromMetadata(self, metad): + header = '\n' + tree = self.convertMetadataToXML(self, metad) + + return header + ET.tostring(tree.getroot()) + + def indent(self, elem, level=0): + # For making the XML output readable + i = "\n" + level * " " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + self.indent(elem, level + 1) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + def convertMetadataToXML(self, filename, metadata): + # Shorthand for metadata + md = metadata + # Build a tree structure + root = ET.Element("ComicInfo") + root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" + root.attrib['xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema" + # helper func + def assign(cix_entry, md_entry): + if md_entry is not None: + ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry) + + assign('Title', md.title) + assign('Series', md.series) + assign('Number', md.issue) + assign('Count', md.issueCount) + assign('Volume', md.volume) + assign('AlternateSeries', md.alternateSeries) + assign('AlternateNumber', md.alternateNumber) + assign('StoryArc', md.storyArc) + assign('SeriesGroup', md.seriesGroup) + assign('AlternateCount', md.alternateCount) + assign('Summary', md.comments) + assign('Notes', md.notes) + assign('Year', md.year) + assign('Month', md.month) + assign('Day', md.day) + + # need to specially process the credits, since they are structured + # differently than CIX + credit_writer_list = list() + credit_penciller_list = list() + credit_inker_list = list() + credit_colorist_list = list() + credit_letterer_list = list() + credit_cover_list = list() + credit_editor_list = list() + + # first, loop thru credits, and build a list for each role that CIX + # supports + for credit in metadata.credits: + + if credit['role'].lower() in set(self.writer_synonyms): + credit_writer_list.append(credit['person'].replace(",", "")) + + if credit['role'].lower() in set(self.penciller_synonyms): + credit_penciller_list.append(credit['person'].replace(",", "")) + + if credit['role'].lower() in set(self.inker_synonyms): + credit_inker_list.append(credit['person'].replace(",", "")) + + if credit['role'].lower() in set(self.colorist_synonyms): + credit_colorist_list.append(credit['person'].replace(",", "")) + + if credit['role'].lower() in set(self.letterer_synonyms): + credit_letterer_list.append(credit['person'].replace(",", "")) + + if credit['role'].lower() in set(self.cover_synonyms): + credit_cover_list.append(credit['person'].replace(",", "")) + + if credit['role'].lower() in set(self.editor_synonyms): + credit_editor_list.append(credit['person'].replace(",", "")) + + # second, convert each list to string, and add to XML struct + if len(credit_writer_list) > 0: + node = ET.SubElement(root, 'Writer') + node.text = utils.listToString(credit_writer_list) + + if len(credit_penciller_list) > 0: + node = ET.SubElement(root, 'Penciller') + node.text = utils.listToString(credit_penciller_list) + + if len(credit_inker_list) > 0: + node = ET.SubElement(root, 'Inker') + node.text = utils.listToString(credit_inker_list) + + if len(credit_colorist_list) > 0: + node = ET.SubElement(root, 'Colorist') + node.text = utils.listToString(credit_colorist_list) + + if len(credit_letterer_list) > 0: + node = ET.SubElement(root, 'Letterer') + node.text = utils.listToString(credit_letterer_list) + + if len(credit_cover_list) > 0: + node = ET.SubElement(root, 'CoverArtist') + node.text = utils.listToString(credit_cover_list) + + if len(credit_editor_list) > 0: + node = ET.SubElement(root, 'Editor') + node.text = utils.listToString(credit_editor_list) + + assign('Publisher', md.publisher) + assign('Imprint', md.imprint) + assign('Genre', md.genre) + assign('Web', md.webLink) + assign('PageCount', md.pageCount) + assign('LanguageISO', md.language) + assign('Format', md.format) + assign('AgeRating', md.maturityRating) + if md.blackAndWhite is not None and md.blackAndWhite: + ET.SubElement(root, 'BlackAndWhite').text = "Yes" + assign('Manga', md.manga) + assign('Characters', md.characters) + assign('Teams', md.teams) + assign('Locations', md.locations) + assign('ScanInformation', md.scanInfo) + + # loop and add the page entries under pages node + if len(md.pages) > 0: + pages_node = ET.SubElement(root, 'Pages') + for page_dict in md.pages: + page_node = ET.SubElement(pages_node, 'Page') + page_node.attrib = page_dict + + # self pretty-print + self.indent(root) + + # wrap it in an ElementTree instance, and save as XML + tree = ET.ElementTree(root) + return tree + + def convertXMLToMetadata(self, tree): + + root = tree.getroot() + + if root.tag != 'ComicInfo': + raise 1 + return None + + metadata = GenericMetadata() + md = metadata + + # Helper function + def xlate(tag): + node = root.find(tag) + if node is not None: + return node.text + else: + return None + + md.series = xlate('Series') + md.title = xlate('Title') + md.issue = xlate('Number') + md.issueCount = xlate('Count') + md.volume = xlate('Volume') + md.alternateSeries = xlate('AlternateSeries') + md.alternateNumber = xlate('AlternateNumber') + md.alternateCount = xlate('AlternateCount') + md.comments = xlate('Summary') + md.notes = xlate('Notes') + md.year = xlate('Year') + md.month = xlate('Month') + md.day = xlate('Day') + md.publisher = xlate('Publisher') + md.imprint = xlate('Imprint') + md.genre = xlate('Genre') + md.webLink = xlate('Web') + md.language = xlate('LanguageISO') + md.format = xlate('Format') + md.manga = xlate('Manga') + md.characters = xlate('Characters') + md.teams = xlate('Teams') + md.locations = xlate('Locations') + md.pageCount = xlate('PageCount') + md.scanInfo = xlate('ScanInformation') + md.storyArc = xlate('StoryArc') + md.seriesGroup = xlate('SeriesGroup') + md.maturityRating = xlate('AgeRating') + + tmp = xlate('BlackAndWhite') + md.blackAndWhite = False + if tmp is not None and tmp.lower() in ["yes", "true", "1"]: + md.blackAndWhite = True + # Now extract the credit info + for n in root: + if (n.tag == 'Writer' or + n.tag == 'Penciller' or + n.tag == 'Inker' or + n.tag == 'Colorist' or + n.tag == 'Letterer' or + n.tag == 'Editor' + ): + if n.text is not None: + for name in n.text.split(','): + metadata.addCredit(name.strip(), n.tag) + + if n.tag == 'CoverArtist': + if n.text is not None: + for name in n.text.split(','): + metadata.addCredit(name.strip(), "Cover") + + # parse page data now + pages_node = root.find("Pages") + if pages_node is not None: + for page in pages_node: + metadata.pages.append(page.attrib) + # print page.attrib + + metadata.isEmpty = False + + return metadata + + def writeToExternalFile(self, filename, metadata): + + tree = self.convertMetadataToXML(self, metadata) + # ET.dump(tree) + tree.write(filename, encoding='utf-8') + + def readFromExternalFile(self, filename): + + tree = ET.parse(filename) + return self.convertXMLToMetadata(tree) diff --git a/comics/utils/comicapi/comicutils.py b/comics/utils/comicapi/comicutils.py new file mode 100644 index 0000000..3cf59ab --- /dev/null +++ b/comics/utils/comicapi/comicutils.py @@ -0,0 +1,459 @@ +''' +Copyright 2012-2014 Anthony Beville +Copyright 2017 Brian Pepple +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +def listToString(l): + string = "" + if l is not None: + for item in l: + if len(string) > 0: + string += ", " + string += item + return string + +# -o- coding: utf-8 -o- +# ISO639 python dict +# official list in http://www.loc.gov/standards/iso639-2/php/code_list.php + +lang_dict = { + 'ab': 'Abkhaz', + 'aa': 'Afar', + 'af': 'Afrikaans', + 'ak': 'Akan', + 'sq': 'Albanian', + 'am': 'Amharic', + 'ar': 'Arabic', + 'an': 'Aragonese', + 'hy': 'Armenian', + 'as': 'Assamese', + 'av': 'Avaric', + 'ae': 'Avestan', + 'ay': 'Aymara', + 'az': 'Azerbaijani', + 'bm': 'Bambara', + 'ba': 'Bashkir', + 'eu': 'Basque', + 'be': 'Belarusian', + 'bn': 'Bengali', + 'bh': 'Bihari', + 'bi': 'Bislama', + 'bs': 'Bosnian', + 'br': 'Breton', + 'bg': 'Bulgarian', + 'my': 'Burmese', + 'ca': 'Catalan; Valencian', + 'ch': 'Chamorro', + 'ce': 'Chechen', + 'ny': 'Chichewa; Chewa; Nyanja', + 'zh': 'Chinese', + 'cv': 'Chuvash', + 'kw': 'Cornish', + 'co': 'Corsican', + 'cr': 'Cree', + 'hr': 'Croatian', + 'cs': 'Czech', + 'da': 'Danish', + 'dv': 'Divehi; Maldivian;', + 'nl': 'Dutch', + 'dz': 'Dzongkha', + 'en': 'English', + 'eo': 'Esperanto', + 'et': 'Estonian', + 'ee': 'Ewe', + 'fo': 'Faroese', + 'fj': 'Fijian', + 'fi': 'Finnish', + 'fr': 'French', + 'ff': 'Fula', + 'gl': 'Galician', + 'ka': 'Georgian', + 'de': 'German', + 'el': 'Greek, Modern', + 'gn': 'Guaraní', + 'gu': 'Gujarati', + 'ht': 'Haitian', + 'ha': 'Hausa', + 'he': 'Hebrew (modern)', + 'hz': 'Herero', + 'hi': 'Hindi', + 'ho': 'Hiri Motu', + 'hu': 'Hungarian', + 'ia': 'Interlingua', + 'id': 'Indonesian', + 'ie': 'Interlingue', + 'ga': 'Irish', + 'ig': 'Igbo', + 'ik': 'Inupiaq', + 'io': 'Ido', + 'is': 'Icelandic', + 'it': 'Italian', + 'iu': 'Inuktitut', + 'ja': 'Japanese', + 'jv': 'Javanese', + 'kl': 'Kalaallisut', + 'kn': 'Kannada', + 'kr': 'Kanuri', + 'ks': 'Kashmiri', + 'kk': 'Kazakh', + 'km': 'Khmer', + 'ki': 'Kikuyu, Gikuyu', + 'rw': 'Kinyarwanda', + 'ky': 'Kirghiz, Kyrgyz', + 'kv': 'Komi', + 'kg': 'Kongo', + 'ko': 'Korean', + 'ku': 'Kurdish', + 'kj': 'Kwanyama, Kuanyama', + 'la': 'Latin', + 'lb': 'Luxembourgish', + 'lg': 'Luganda', + 'li': 'Limburgish', + 'ln': 'Lingala', + 'lo': 'Lao', + 'lt': 'Lithuanian', + 'lu': 'Luba-Katanga', + 'lv': 'Latvian', + 'gv': 'Manx', + 'mk': 'Macedonian', + 'mg': 'Malagasy', + 'ms': 'Malay', + 'ml': 'Malayalam', + 'mt': 'Maltese', + 'mi': 'Māori', + 'mr': 'Marathi (Marāṭhī)', + 'mh': 'Marshallese', + 'mn': 'Mongolian', + 'na': 'Nauru', + 'nv': 'Navajo, Navaho', + 'nb': 'Norwegian Bokmål', + 'nd': 'North Ndebele', + 'ne': 'Nepali', + 'ng': 'Ndonga', + 'nn': 'Norwegian Nynorsk', + 'no': 'Norwegian', + 'ii': 'Nuosu', + 'nr': 'South Ndebele', + 'oc': 'Occitan', + 'oj': 'Ojibwe, Ojibwa', + 'cu': 'Old Church Slavonic', + 'om': 'Oromo', + 'or': 'Oriya', + 'os': 'Ossetian, Ossetic', + 'pa': 'Panjabi, Punjabi', + 'pi': 'Pāli', + 'fa': 'Persian', + 'pl': 'Polish', + 'ps': 'Pashto, Pushto', + 'pt': 'Portuguese', + 'qu': 'Quechua', + 'rm': 'Romansh', + 'rn': 'Kirundi', + 'ro': 'Romanian, Moldavan', + 'ru': 'Russian', + 'sa': 'Sanskrit (Saṁskṛta)', + 'sc': 'Sardinian', + 'sd': 'Sindhi', + 'se': 'Northern Sami', + 'sm': 'Samoan', + 'sg': 'Sango', + 'sr': 'Serbian', + 'gd': 'Scottish Gaelic', + 'sn': 'Shona', + 'si': 'Sinhala, Sinhalese', + 'sk': 'Slovak', + 'sl': 'Slovene', + 'so': 'Somali', + 'st': 'Southern Sotho', + 'es': 'Spanish; Castilian', + 'su': 'Sundanese', + 'sw': 'Swahili', + 'ss': 'Swati', + 'sv': 'Swedish', + 'ta': 'Tamil', + 'te': 'Telugu', + 'tg': 'Tajik', + 'th': 'Thai', + 'ti': 'Tigrinya', + 'bo': 'Tibetan', + 'tk': 'Turkmen', + 'tl': 'Tagalog', + 'tn': 'Tswana', + 'to': 'Tonga', + 'tr': 'Turkish', + 'ts': 'Tsonga', + 'tt': 'Tatar', + 'tw': 'Twi', + 'ty': 'Tahitian', + 'ug': 'Uighur, Uyghur', + 'uk': 'Ukrainian', + 'ur': 'Urdu', + 'uz': 'Uzbek', + 've': 'Venda', + 'vi': 'Vietnamese', + 'vo': 'Volapük', + 'wa': 'Walloon', + 'cy': 'Welsh', + 'wo': 'Wolof', + 'fy': 'Western Frisian', + 'xh': 'Xhosa', + 'yi': 'Yiddish', + 'yo': 'Yoruba', + 'za': 'Zhuang, Chuang', + 'zu': 'Zulu', +} + + +countries = [ + ('AF', 'Afghanistan'), + ('AL', 'Albania'), + ('DZ', 'Algeria'), + ('AS', 'American Samoa'), + ('AD', 'Andorra'), + ('AO', 'Angola'), + ('AI', 'Anguilla'), + ('AQ', 'Antarctica'), + ('AG', 'Antigua And Barbuda'), + ('AR', 'Argentina'), + ('AM', 'Armenia'), + ('AW', 'Aruba'), + ('AU', 'Australia'), + ('AT', 'Austria'), + ('AZ', 'Azerbaijan'), + ('BS', 'Bahamas'), + ('BH', 'Bahrain'), + ('BD', 'Bangladesh'), + ('BB', 'Barbados'), + ('BY', 'Belarus'), + ('BE', 'Belgium'), + ('BZ', 'Belize'), + ('BJ', 'Benin'), + ('BM', 'Bermuda'), + ('BT', 'Bhutan'), + ('BO', 'Bolivia'), + ('BA', 'Bosnia And Herzegowina'), + ('BW', 'Botswana'), + ('BV', 'Bouvet Island'), + ('BR', 'Brazil'), + ('BN', 'Brunei Darussalam'), + ('BG', 'Bulgaria'), + ('BF', 'Burkina Faso'), + ('BI', 'Burundi'), + ('KH', 'Cambodia'), + ('CM', 'Cameroon'), + ('CA', 'Canada'), + ('CV', 'Cape Verde'), + ('KY', 'Cayman Islands'), + ('CF', 'Central African Rep'), + ('TD', 'Chad'), + ('CL', 'Chile'), + ('CN', 'China'), + ('CX', 'Christmas Island'), + ('CC', 'Cocos Islands'), + ('CO', 'Colombia'), + ('KM', 'Comoros'), + ('CG', 'Congo'), + ('CK', 'Cook Islands'), + ('CR', 'Costa Rica'), + ('CI', 'Cote D`ivoire'), + ('HR', 'Croatia'), + ('CU', 'Cuba'), + ('CY', 'Cyprus'), + ('CZ', 'Czech Republic'), + ('DK', 'Denmark'), + ('DJ', 'Djibouti'), + ('DM', 'Dominica'), + ('DO', 'Dominican Republic'), + ('TP', 'East Timor'), + ('EC', 'Ecuador'), + ('EG', 'Egypt'), + ('SV', 'El Salvador'), + ('GQ', 'Equatorial Guinea'), + ('ER', 'Eritrea'), + ('EE', 'Estonia'), + ('ET', 'Ethiopia'), + ('FK', 'Falkland Islands (Malvinas)'), + ('FO', 'Faroe Islands'), + ('FJ', 'Fiji'), + ('FI', 'Finland'), + ('FR', 'France'), + ('GF', 'French Guiana'), + ('PF', 'French Polynesia'), + ('TF', 'French S. Territories'), + ('GA', 'Gabon'), + ('GM', 'Gambia'), + ('GE', 'Georgia'), + ('DE', 'Germany'), + ('GH', 'Ghana'), + ('GI', 'Gibraltar'), + ('GR', 'Greece'), + ('GL', 'Greenland'), + ('GD', 'Grenada'), + ('GP', 'Guadeloupe'), + ('GU', 'Guam'), + ('GT', 'Guatemala'), + ('GN', 'Guinea'), + ('GW', 'Guinea-bissau'), + ('GY', 'Guyana'), + ('HT', 'Haiti'), + ('HN', 'Honduras'), + ('HK', 'Hong Kong'), + ('HU', 'Hungary'), + ('IS', 'Iceland'), + ('IN', 'India'), + ('ID', 'Indonesia'), + ('IR', 'Iran'), + ('IQ', 'Iraq'), + ('IE', 'Ireland'), + ('IL', 'Israel'), + ('IT', 'Italy'), + ('JM', 'Jamaica'), + ('JP', 'Japan'), + ('JO', 'Jordan'), + ('KZ', 'Kazakhstan'), + ('KE', 'Kenya'), + ('KI', 'Kiribati'), + ('KP', 'Korea (North)'), + ('KR', 'Korea (South)'), + ('KW', 'Kuwait'), + ('KG', 'Kyrgyzstan'), + ('LA', 'Laos'), + ('LV', 'Latvia'), + ('LB', 'Lebanon'), + ('LS', 'Lesotho'), + ('LR', 'Liberia'), + ('LY', 'Libya'), + ('LI', 'Liechtenstein'), + ('LT', 'Lithuania'), + ('LU', 'Luxembourg'), + ('MO', 'Macau'), + ('MK', 'Macedonia'), + ('MG', 'Madagascar'), + ('MW', 'Malawi'), + ('MY', 'Malaysia'), + ('MV', 'Maldives'), + ('ML', 'Mali'), + ('MT', 'Malta'), + ('MH', 'Marshall Islands'), + ('MQ', 'Martinique'), + ('MR', 'Mauritania'), + ('MU', 'Mauritius'), + ('YT', 'Mayotte'), + ('MX', 'Mexico'), + ('FM', 'Micronesia'), + ('MD', 'Moldova'), + ('MC', 'Monaco'), + ('MN', 'Mongolia'), + ('MS', 'Montserrat'), + ('MA', 'Morocco'), + ('MZ', 'Mozambique'), + ('MM', 'Myanmar'), + ('NA', 'Namibia'), + ('NR', 'Nauru'), + ('NP', 'Nepal'), + ('NL', 'Netherlands'), + ('AN', 'Netherlands Antilles'), + ('NC', 'New Caledonia'), + ('NZ', 'New Zealand'), + ('NI', 'Nicaragua'), + ('NE', 'Niger'), + ('NG', 'Nigeria'), + ('NU', 'Niue'), + ('NF', 'Norfolk Island'), + ('MP', 'Northern Mariana Islands'), + ('NO', 'Norway'), + ('OM', 'Oman'), + ('PK', 'Pakistan'), + ('PW', 'Palau'), + ('PA', 'Panama'), + ('PG', 'Papua New Guinea'), + ('PY', 'Paraguay'), + ('PE', 'Peru'), + ('PH', 'Philippines'), + ('PN', 'Pitcairn'), + ('PL', 'Poland'), + ('PT', 'Portugal'), + ('PR', 'Puerto Rico'), + ('QA', 'Qatar'), + ('RE', 'Reunion'), + ('RO', 'Romania'), + ('RU', 'Russian Federation'), + ('RW', 'Rwanda'), + ('KN', 'Saint Kitts And Nevis'), + ('LC', 'Saint Lucia'), + ('VC', 'St Vincent/Grenadines'), + ('WS', 'Samoa'), + ('SM', 'San Marino'), + ('ST', 'Sao Tome'), + ('SA', 'Saudi Arabia'), + ('SN', 'Senegal'), + ('SC', 'Seychelles'), + ('SL', 'Sierra Leone'), + ('SG', 'Singapore'), + ('SK', 'Slovakia'), + ('SI', 'Slovenia'), + ('SB', 'Solomon Islands'), + ('SO', 'Somalia'), + ('ZA', 'South Africa'), + ('ES', 'Spain'), + ('LK', 'Sri Lanka'), + ('SH', 'St. Helena'), + ('PM', 'St.Pierre'), + ('SD', 'Sudan'), + ('SR', 'Suriname'), + ('SZ', 'Swaziland'), + ('SE', 'Sweden'), + ('CH', 'Switzerland'), + ('SY', 'Syrian Arab Republic'), + ('TW', 'Taiwan'), + ('TJ', 'Tajikistan'), + ('TZ', 'Tanzania'), + ('TH', 'Thailand'), + ('TG', 'Togo'), + ('TK', 'Tokelau'), + ('TO', 'Tonga'), + ('TT', 'Trinidad And Tobago'), + ('TN', 'Tunisia'), + ('TR', 'Turkey'), + ('TM', 'Turkmenistan'), + ('TV', 'Tuvalu'), + ('UG', 'Uganda'), + ('UA', 'Ukraine'), + ('AE', 'United Arab Emirates'), + ('UK', 'United Kingdom'), + ('US', 'United States'), + ('UY', 'Uruguay'), + ('UZ', 'Uzbekistan'), + ('VU', 'Vanuatu'), + ('VA', 'Vatican City State'), + ('VE', 'Venezuela'), + ('VN', 'Viet Nam'), + ('VG', 'Virgin Islands (British)'), + ('VI', 'Virgin Islands (U.S.)'), + ('EH', 'Western Sahara'), + ('YE', 'Yemen'), + ('YU', 'Yugoslavia'), + ('ZR', 'Zaire'), + ('ZM', 'Zambia'), + ('ZW', 'Zimbabwe') +] + + +def getLanguageDict(): + return lang_dict + + +def getLanguageFromISO(iso): + if iso is None: + return None + else: + return lang_dict[iso] diff --git a/comics/utils/comicapi/filenameparser.py b/comics/utils/comicapi/filenameparser.py new file mode 100644 index 0000000..e899692 --- /dev/null +++ b/comics/utils/comicapi/filenameparser.py @@ -0,0 +1,283 @@ +''' +Copyright 2012-2014 Anthony Beville +Copyright 2017 Brian Pepple +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import re +import os +from urllib.parse import unquote + + +class FileNameParser: + + def repl(self, m): + return ' ' * len(m.group()) + + def fixSpaces(self, string, remove_dashes=True): + if remove_dashes: + placeholders = ['[-_]', ' +'] + else: + placeholders = ['[_]', ' +'] + for ph in placeholders: + string = re.sub(ph, self.repl, string) + return string # .strip() + + def getIssueCount(self, filename, issue_end): + count = "" + filename = filename[issue_end:] + + # replace any name separators with spaces + tmpstr = self.fixSpaces(filename) + found = False + + match = re.search(r'(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE) + if match: + count = match.group() + found = True + + if not found: + match = re.search(r'(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE) + if match: + count = match.group() + found = True + + count = count.lstrip("0") + + return count + + def getIssueNumber(self, filename): + """Returns a tuple of issue number string, and start and end indexes in the filename + (The indexes will be used to split the string up for further parsing) + """ + + found = False + issue = '' + start = 0 + end = 0 + + # first, look for multiple "--", this means it's formatted differently + # from most: + if "--" in filename: + # the pattern seems to be that anything to left of the first "--" + # is the series name followed by issue + filename = re.sub("--.*", self.repl, filename) + + elif "__" in filename: + # the pattern seems to be that anything to left of the first "__" + # is the series name followed by issue + filename = re.sub("__.*", self.repl, filename) + + filename = filename.replace("+", " ") + + # replace parenthetical phrases with spaces + filename = re.sub("\(.*?\)", self.repl, filename) + filename = re.sub("\[.*?\]", self.repl, filename) + + # replace any name separators with spaces + filename = self.fixSpaces(filename) + + # remove any "of NN" phrase with spaces (problem: this could break on + # some titles) + filename = re.sub("of [\d]+", self.repl, filename) + + # print u"[{0}]".format(filename) + + # we should now have a cleaned up filename version with all the words in + # the same positions as original filename + + # make a list of each word and its position + word_list = list() + for m in re.finditer("\S+", filename): + word_list.append((m.group(0), m.start(), m.end())) + + # remove the first word, since it can't be the issue number + if len(word_list) > 1: + word_list = word_list[1:] + else: + # only one word?? just bail. + return issue, start, end + + # Now try to search for the likely issue number word in the list + + # first look for a word with "#" followed by digits with optional suffix + # this is almost certainly the issue number + for w in reversed(word_list): + if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + found = True + break + + # same as above but w/o a '#', and only look at the last word in the + # list + if not found: + w = word_list[-1] + if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + found = True + + # now try to look for a # followed by any characters + if not found: + for w in reversed(word_list): + if re.match("#\S+", w[0]): + found = True + break + + if found: + issue = w[0] + start = w[1] + end = w[2] + if issue[0] == '#': + issue = issue[1:] + + return issue, start, end + + def getSeriesName(self, filename, issue_start): + """Use the issue number string index to split the filename string""" + + if issue_start != 0: + filename = filename[:issue_start] + + # in case there is no issue number, remove some obvious stuff + if "--" in filename: + # the pattern seems to be that anything to left of the first "--" + # is the series name followed by issue + filename = re.sub("--.*", self.repl, filename) + + elif "__" in filename: + # the pattern seems to be that anything to left of the first "__" + # is the series name followed by issue + filename = re.sub("__.*", self.repl, filename) + + filename = filename.replace("+", " ") + tmpstr = self.fixSpaces(filename, remove_dashes=False) + + series = tmpstr + volume = "" + + # save the last word + try: + last_word = series.split()[-1] + except: + last_word = "" + + # remove any parenthetical phrases + series = re.sub("\(.*?\)", "", series) + + # search for volume number + match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) + if match: + series = match.group(1) + volume = match.group(3) + + # if a volume wasn't found, see if the last word is a year in parentheses + # since that's a common way to designate the volume + if volume == "": + # match either (YEAR), (YEAR-), or (YEAR-YEAR2) + match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word) + if match: + volume = match.group(2) + + series = series.strip() + + # if we don't have an issue number (issue_start==0), look + # for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might + # be removed to help search online + if issue_start == 0: + one_shot_words = ["tpb", "os", "one-shot", "ogn", "gn"] + try: + last_word = series.split()[-1] + if last_word.lower() in one_shot_words: + series = series.rsplit(' ', 1)[0] + except: + pass + + return series, volume.strip() + + def getYear(self, filename, issue_end): + + filename = filename[issue_end:] + + year = "" + # look for four digit number with "(" ")" or "--" around it + match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename) + if match: + year = match.group() + # remove non-digits + year = re.sub("[^0-9]", "", year) + return year + + def getRemainder(self, filename, year, count, volume, issue_end): + """Make a guess at where the the non-interesting stuff begins""" + + remainder = "" + + if "--" in filename: + remainder = filename.split("--", 1)[1] + elif "__" in filename: + remainder = filename.split("__", 1)[1] + elif issue_end != 0: + remainder = filename[issue_end:] + + remainder = self.fixSpaces(remainder, remove_dashes=False) + if volume != "": + remainder = remainder.replace("Vol." + volume, "", 1) + if year != "": + remainder = remainder.replace(year, "", 1) + if count != "": + remainder = remainder.replace("of " + count, "", 1) + + remainder = remainder.replace("()", "") + remainder = remainder.replace( + " ", + " ") # cleans some whitespace mess + + return remainder.strip() + + def parseFilename(self, filename): + + # remove the path + filename = os.path.basename(filename) + + # remove the extension + filename = os.path.splitext(filename)[0] + + # url decode, just in case + filename = unquote(filename) + + # sometimes archives get messed up names from too many decodes + # often url encodings will break and leave "_28" and "_29" in place + # of "(" and ")" see if there are a number of these, and replace them + if filename.count("_28") > 1 and filename.count("_29") > 1: + filename = filename.replace("_28", "(") + filename = filename.replace("_29", ")") + + self.issue, issue_start, issue_end = self.getIssueNumber(filename) + self.series, self.volume = self.getSeriesName(filename, issue_start) + + # provides proper value when the filename doesn't have a issue number + if issue_end == 0: + issue_end = len(self.series) + + self.year = self.getYear(filename, issue_end) + self.issue_count = self.getIssueCount(filename, issue_end) + self.remainder = self.getRemainder( + filename, + self.year, + self.issue_count, + self.volume, + issue_end) + + if self.issue != "": + # strip off leading zeros + self.issue = self.issue.lstrip("0") + if self.issue == "": + self.issue = "0" + if self.issue[0] == ".": + self.issue = "0" + self.issue diff --git a/comics/utils/comicapi/genericmetadata.py b/comics/utils/comicapi/genericmetadata.py new file mode 100644 index 0000000..eecdcc5 --- /dev/null +++ b/comics/utils/comicapi/genericmetadata.py @@ -0,0 +1,294 @@ +''' +Copyright 2012-2014 Anthony Beville +Copyright 2017 Brian Pepple +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +from . import comicutils as utils + + +class PageType(object): + ''' + These page info classes are exactly the same as the CIX scheme, since it's unique + ''' + FrontCover = "FrontCover" + InnerCover = "InnerCover" + RoundUp = "RoundUp" + Story = "Story" + Advertisement = "Advertisement" + Editorial = "Editorial" + Letters = "Letters" + Preview = "Preview" + BackCover = "BackCover" + Other = "Other" + Deleted = "Deleted" + +class GenericMetadata(object): + + def __init__(self): + + self.isEmpty = True + self.TagOrigin = None + + self.series = None + self.issue = None + self.title = None + self.publisher = None + self.month = None + self.year = None + self.day = None + self.issueCount = None + self.volume = None + self.genre = None + self.language = None # 2 letter iso code + self.comments = None # Use as same way as Summary in CIX + + self.volumeCount = None + self.criticalRating = None + self.country = None + + self.alternateSeries = None + self.alternateNumber = None + self.alternateCount = None + self.imprint = None + self.notes = None + self.webLink = None + self.format = None + self.manga = None + self.blackAndWhite = None + self.pageCount = None + self.maturityRating = None + + self.storyArc = None + self.seriesGroup = None + self.scanInfo = None + + self.characters = None + self.teams = None + self.locations = None + + self.credits = list() + self.tags = list() + self.pages = list() + + # Some CoMet-only items + self.price = None + self.isVersionOf = None + self.rights = None + self.identifier = None + self.lastMark = None + self.coverImage = None + + def overlay(self, new_md): + ''' + Overlay a metadata object on this one. + + That is, when the new object has non-None values, over-write them + to this one. + ''' + + def assign(curr, new_value): + if new_value is not None: + if isinstance(new_value, str) and len(new_value) == 0: + setattr(self, curr, None) + else: + setattr(self, curr, new_value) + + if not new_md.isEmpty: + self.isEmpty = False + + assign("series", new_md.series) + assign("issue", new_md.issue) + assign("issueCount", new_md.issueCount) + assign("title", new_md.title) + assign("publisher", new_md.publisher) + assign("day", new_md.day) + assign("month", new_md.month) + assign("year", new_md.year) + assign("volume", new_md.volume) + assign("volumeCount", new_md.volumeCount) + assign("genre", new_md.genre) + assign("language", new_md.language) + assign("country", new_md.country) + assign("criticalRating", new_md.criticalRating) + assign("alternateSeries", new_md.alternateSeries) + assign("alternateNumber", new_md.alternateNumber) + assign("alternateCount", new_md.alternateCount) + assign("imprint", new_md.imprint) + assign("webLink", new_md.webLink) + assign("format", new_md.format) + assign("manga", new_md.manga) + assign("blackAndWhite", new_md.blackAndWhite) + assign("maturityRating", new_md.maturityRating) + assign("storyArc", new_md.storyArc) + assign("seriesGroup", new_md.seriesGroup) + assign("scanInfo", new_md.scanInfo) + assign("characters", new_md.characters) + assign("teams", new_md.teams) + assign("locations", new_md.locations) + assign("comments", new_md.comments) + assign("notes", new_md.notes) + + assign("price", new_md.price) + assign("isVersionOf", new_md.isVersionOf) + assign("rights", new_md.rights) + assign("identifier", new_md.identifier) + assign("lastMark", new_md.lastMark) + + self.overlayCredits(new_md.credits) + + if len(new_md.tags) > 0: + assign("tags", new_md.tags) + + if len(new_md.pages) > 0: + assign("pages", new_md.pages) + + def overlayCredits(self, new_credits): + for c in new_credits: + if 'primary' in c and c['primary']: + primary = True + else: + primary = False + + # Remove credit role if person is blank + if c['person'] == "": + for r in reversed(self.credits): + if r['role'].lower() == c['role'].lower(): + self.credits.remove(r) + # otherwise, add it! + else: + self.addCredit(c['person'], c['role'], primary) + + def setDefaultPageList(self, page_count): + # generate a default page list, with the first page marked as the cover + for i in range(page_count): + page_dict = dict() + page_dict['Image'] = str(i) + if i == 0: + page_dict['Type'] = PageType.FrontCover + self.pages.append(page_dict) + + def getArchivePageIndex(self, pagenum): + # convert the displayed page number to the page index of the file in + # the archive + if pagenum < len(self.pages): + return int(self.pages[pagenum]['Image']) + else: + return 0 + + def getCoverPageIndexList(self): + # return a list of archive page indices of cover pages + coverlist = [] + for p in self.pages: + if 'Type' in p and p['Type'] == PageType.FrontCover: + coverlist.append(int(p['Image'])) + + if len(coverlist) == 0: + coverlist.append(0) + + return coverlist + + def addCredit(self, person, role, primary=False): + + credit = dict() + credit['person'] = person + credit['role'] = role + if primary: + credit['primary'] = primary + + # look to see if it's not already there... + found = False + for c in self.credits: + if (c['person'].lower() == person.lower() and + c['role'].lower() == role.lower()): + # no need to add it. just adjust the "primary" flag as needed + c['primary'] = primary + found = True + break + + if not found: + self.credits.append(credit) + + def __str__(self, *args, **kwargs): + vals = [] + if self.isEmpty: + return "No metadata" + + def add_string(tag, val): + if val is not None and u"{0}".format(val) != "": + vals.append(tag, val) + + def add_attr_string(tag): + val = getattr(self, tag) + add_string(tag, val) + + add_attr_string("series") + add_attr_string("issue") + add_attr_string("issueCount") + add_attr_string("title") + add_attr_string("publisher") + add_attr_string("year") + add_attr_string("month") + add_attr_string("day") + add_attr_string("volume") + add_attr_string("volumeCount") + add_attr_string("genre") + add_attr_string("language") + add_attr_string("country") + add_attr_string("criticalRating") + add_attr_string("alternateSeries") + add_attr_string("alternateNumber") + add_attr_string("alternateCount") + add_attr_string("imprint") + add_attr_string("webLink") + add_attr_string("format") + add_attr_string("manga") + + add_attr_string("price") + add_attr_string("isVersionOf") + add_attr_string("rights") + add_attr_string("identifier") + add_attr_string("lastMark") + + if self.blackAndWhite: + add_attr_string("blackAndWhite") + add_attr_string("maturityRating") + add_attr_string("storyArc") + add_attr_string("seriesGroup") + add_attr_string("scanInfo") + add_attr_string("characters") + add_attr_string("teams") + add_attr_string("locations") + add_attr_string("comments") + add_attr_string("notes") + + add_string("tags", utils.listToString(self.tags)) + + for c in self.credits: + primary = "" + if 'primary' in c and c['primary']: + primary = " [P]" + add_string("credit", c['role'] + ": " + c['person'] + primary) + + + # find the longest field name + flen = 0 + for i in vals: + flen = max(flen, len(i[0])) + flen += 1 + + # format the data nicely + outstr = "" + fmt_str = u"{0: <" + str(flen) + "} {1}\n" + for i in vals: + outstr += fmt_str.format(i[0] + ":", i[1]) + + return outstr diff --git a/comics/utils/comicapi/issuestring.py b/comics/utils/comicapi/issuestring.py new file mode 100644 index 0000000..ba35307 --- /dev/null +++ b/comics/utils/comicapi/issuestring.py @@ -0,0 +1,116 @@ +''' +Copyright 2012-2014 Anthony Beville +Copyright 2017 Brian Pepple +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + + +class IssueString(object): + + def __init__(self, text): + # Break up the issue number string into 2 parts: the numeric and suffix string. + # (assumes that the numeric portion is always first.) + self.num = None + self.suffex = "" + if text is None: + return + + if type(text) == int: + text = str(text) + + if len(text) == 0: + return + + text = str(text) + + # skip the minus sign if it's first + if text[0] == "-": + start = 1 + else: + start = 0 + + # If it's still not numeric at start skip it. + if text[start].isdigit() or text[start] == ".": + # Walk thru the string, look for split point (the first non-numeric). + decimal_count = 0 + for idx in range(start, len(text)): + if text[idx] not in "0123456789.": + break + # special case: also split on second "." + if text[idx] == ".": + decimal_count += 1 + if decimal_count > 1: + break + else: + idx = len(text) + + # move trailing numeric decimal to suffix + # (only if there is other junk after ) + if text[idx - 1] == "." and len(text) != idx: + idx = idx - 1 + + # if there is no numeric after the minus, make the minus part of the suffix + if idx == 1 and start == 1: + idx = 0 + + part1 = text[0:idx] + part2 = text[idx:len(text)] + + if part1 != "": + self.num = float(part1) + self.suffix = part2 + else: + self.suffix = text + + def asString(self, pad=0): + # return the float, left side zero-padded, with suffix attached + if self.num is None: + return self.suffix + + negative = self.num < 0 + + num_f = abs(self.num) + + num_int = int(num_f) + num_s = str(num_int) + if float(num_int) != num_f: + num_s = str(num_f) + + num_s += self.suffix + + # create padding + padding = "" + l = len(str(num_int)) + if l < pad : + padding = "0" * (pad - l) + + num_s = padding + num_s + if negative: + num_s = "-" + num_s + + return num_s + + def asFloat(self): + # return the float, with no suffix + if self.suffix == u"½": + if self.num is not None: + return self.num + .5 + else: + return .5 + return self.num + + def asInt(self): + # return the int version of the float + if self.num is None: + return None + return int(self.num) + + diff --git a/comics/utils/comicimporter.py b/comics/utils/comicimporter.py index 09680cd..82cae57 100644 --- a/comics/utils/comicimporter.py +++ b/comics/utils/comicimporter.py @@ -11,6 +11,8 @@ from .comicfilehandler import ComicFileHandler from . import fnameparser, utils +from .comicapi.issuestring import IssueString + from fuzzywuzzy import fuzz @@ -373,9 +375,11 @@ def _reprocess_issue_without_cvid(self, issue_id): issue_cover = cfh.extract_cover(issue.file) issue.page_count = cfh.get_page_count(issue.file) + num = issue_number if issue_number else 1 + # 2. Update Issue information: Issue.objects.filter(id=issue_id).update( - number=issue_number if issue_number else 1, + number=IssueString(num).asString(pad=3), date=issue_year + '-01-01' if issue_year else datetime.date.today(), cover=issue_cover, ) @@ -718,7 +722,7 @@ def _create_issue(self, file, api_url, series_id): cvurl=data['cvurl'], name=data['name'], desc=data['desc'], - number=data['number'], + number=IssueString(data['number']).asString(pad=3), date=data['date'], file=file, series=series, @@ -964,7 +968,7 @@ def _update_issue(self, obj_id, api_url, series_id): cvurl=data['cvurl'], name=data['name'], desc=data['desc'], - number=data['number'], + number=IssueString(data['number']).asString(pad=3), date=data['date'], series=series, cover=data['image'], @@ -1082,7 +1086,7 @@ def _reset_issue(self, obj_id): issue.cvurl = '' issue.name = '' - issue.number = 1 + issue.number = IssueString('1').asString(pad=3), issue.date = datetime.date.today() issue.desc = '' issue.arcs.clear()