Add Generation IX Shape Data #1122

KyroChi · 2024-08-18T05:46:15Z

I have added the generation IX shape data (I think).

I have never contributed to PokéAPI, please tell me anything I am doing here that is stupid or out of line.

The PokéAPI's shape naming differs from Bulbapedia, please double check that I got the conversion correct.

Here is the automated script I used to scrape the data:

import csv
import re
import requests

from bs4 import BeautifulSoup
from pages import load_homepage
from tqdm.auto import tqdm

data_out = {}

BASE_URL = 'https://bulbapedia.bulbagarden.net'
LANDING = '/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number'

pokedata = load_homepage(BASE_URL + LANDING)

for ii, pokemon in (pb := tqdm(enumerate(pokedata[905:]))):
    pb.set_postfix_str(f"Processing {pokemon['name']}")

    url = BASE_URL + pokemon['link']
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        pattern = re.compile(r'File:Body\d{2}.png')
        matches = soup.find_all('a', href=pattern)

        body_type = matches[0]['href'].split('.')[-2][-2:]

        data_out[ii + 1] = int(body_type)
    else:
        raise Exception(f"Failed to get data from {url}, status code: {response.status_code}")
    

# json.dump(data_out, open('/home/kyle/projects/pokemon_data/src/scrape/gen_ix_shapes.json', 'w'))
# data_out = json.load(open('/home/kyle/projects/pokemon_data/src/scrape/gen_ix_shapes.json'))

csv_file = '/home/kyle/code/pokeapi/pokeapi/data/v2/csv/pokemon_species.csv'

bulbapedia_to_pokeapi = {
    1: 1,
    2: 5,
    3: 2,
    4: 6,
    5: 8,
    6: 10,
    7: 7,
    8: 4,
    9: 9,
    10: 13,
    11: 3,
    12: 12,
    13: 14,
    14: 11,
}

with open(csv_file, 'r') as f:
    species_data = list(csv.DictReader(f))

for row in species_data:
    if int(row['id']) < 906:
        continue
    else:
        row['shape_id'] = str(bulbapedia_to_pokeapi[data_out[str(int(row['id']) - 905)]])

with open(csv_file, 'w') as f:
    f.write(','.join(species_data[0].keys()) + '\n')
    for row in species_data:
        f.write(','.join(row.values()) + '\n')

Here is the referenced pages.py file:

import re

import requests
from bs4 import BeautifulSoup
from helpers import get_generation

POKETYPES = [
    "Normal",
    "Fire",
    "Water",
    "Electric",
    "Grass",
    "Ice",
    "Fighting",
    "Poison",
    "Ground",
    "Flying",
    "Psychic",
    "Bug",
    "Rock",
    "Ghost",
    "Dragon",
    "Dark",
    "Steel",
    "Fairy",
]

def load_homepage(url):
    response = requests.get(url)

    pokemon_data = []

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        rows = soup.find_all("tr", style="background:#FFF")

        for row in rows:
            # Currently this cannot handle pokemon with multiple forms
            try:
                cells = row.find_all("td")
                if cells[0].get("rowspan"):
                    id_num = int(
                        cells[0].get_text(strip=True)[1:]
                    )  # Remove the '#' and convert to int
                    name = cells[2].find("a").get_text(strip=True)
                    link = cells[2].find("a")["href"]
                    type_cells = []
                    for a in cells[3:]:
                        type_cells += a.find_all("a")
                    types = [a.get_text(strip=True) for a in type_cells]
                    pokemon_data.append(
                        {
                            "id": id_num,
                            "name": name,
                            "link": link,
                            "type": types,
                            "generation": get_generation(id_num),
                        }
                    )
            except Exception as e:
                print(row)
                print(e)
                print(f"Failed to parse row: {row}")
        return pokemon_data
    else:
        raise Exception(
            f"Failed to get data from {url}, status code: {response.status_code}"
        )

def load_pokemon_page(base_url, pokemon):
    wiki_url = base_url + pokemon["link"]
    response = requests.get(wiki_url)

    return_data = {
        "full_size": None,
        "gen_v_animated_sprites": None,
        "gen_vi_sprites": None,
        "gen_vii_sprites": None,
        "gen_viii_sprites": None,
        "pokedex_entries": [],
        "stage": None,
    }

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")

        if pokemon["name"] == "Wormadam":
            td = soup.find("a", {"class": "image", "title": "Plant Cloak"})
        else:
            td = soup.find("a", {"class": "image", "title": pokemon["name"].title()})
        if not td:
            print(f"Failed to find image for {pokemon['name']} at {wiki_url}")
            # print(soup.find_all('a', {'class': 'image'}))
            # raise Exception(f"Failed to find image for {pokemon['name']} at {wiki_url}")

        # TODO: Also grab the mega versions of the pokemon
        try:
            return_data["full_size"] = td.find("img")["src"]
        except:
            print(f"Failed to find full size image for {pokemon['name']} at {wiki_url}")
            return_data["full_size"] = None

        pattern = re.compile(r"/wiki/File:Spr_5b_.*")
        matches = soup.find_all("a", href=pattern)

        files = []
        for match in matches:
            try:
                files.append(match.find("img")["src"])
            except:
                print(f'Failed to find gen_v sprite for {pokemon["name"]}')
        return_data["gen_v_animated_sprites"] = files

        pattern = re.compile(r"File:Body\d{2}.png")
        matches = soup.find_all("a", href=pattern)

        # BS can sometimes grad more than one match... I am hoping that the first is
        # always the correct one
        body_type = matches[0]["href"].split(".")[-2][-2:]

        return_data["shape"] = int(body_type)

        return return_data
    else:
        raise Exception(
            f"Failed to get data from {wiki_url}, status code: {response.status_code}"
        )

KyroChi · 2024-08-18T05:48:21Z

@Naramsim I am happy to make modifications to this PR. To be perfectly honest I am hoping my simple solution here solved the problem, but I don't necessarily have the time to dive in more deeply.

Naramsim · 2024-08-18T13:04:03Z

Hi, I can't review it now, but it seems very promising!

I'll update you in the next days hopefully

programgames · 2024-09-03T07:43:41Z

what is the library that you use : 'pages' , I didn't found it on internet

KyroChi · 2024-09-03T18:45:50Z

what is the library that you use : 'pages' , I didn't found it on internet

Aw, shoot. That is some code from another library that I have been working on. I have updated the original PR to include this without reference to my own module. Apologies.

pokeapi-machine-user · 2024-09-12T08:29:43Z

A PokeAPI/api-data refresh has started. In ~45 minutes the staging branch of PokeAPI/api-data will be pushed with the new generated data.

The staging branch will be deployed in our staging environment and the entire API will be ready to review.

A Pull Request (master<-staging) will be also created at PokeAPI/api-data and assigned to the PokeAPI Core team to be reviewed. If approved and merged new data will soon be available worldwide at pokeapi.co.

pokeapi-machine-user · 2024-09-12T08:56:15Z

The updater script has finished its job and has now opened a Pull Request towards PokeAPI/api-data with the updated data.

The Pull Request can be seen deployed in our staging environment when CircleCI deploy will be finished (check the start time of the last build).

add generation ix shape data

95ac774

Merge branch 'master' into master

63eadc1

Naramsim merged commit a81f123 into PokeAPI:master Sep 12, 2024
4 checks passed

pokeapi-machine-user added a commit to PokeAPI/api-data that referenced this pull request Sep 12, 2024

[Updater Bot] Regenerate data from PokeAPI/pokeapi#1122

47c3134

pokeapi-machine-user mentioned this pull request Sep 12, 2024

API data update from PokeAPI/pokeapi#1122 PokeAPI/api-data#254

Merged

Naramsim pushed a commit to PokeAPI/api-data that referenced this pull request Sep 12, 2024

API data update from PokeAPI/pokeapi#1122 (#254)

35062df

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add Generation IX Shape Data #1122

Add Generation IX Shape Data #1122

KyroChi commented Aug 18, 2024 •

edited

Loading

KyroChi commented Aug 18, 2024

Naramsim commented Aug 18, 2024

programgames commented Sep 3, 2024

KyroChi commented Sep 3, 2024

pokeapi-machine-user commented Sep 12, 2024

pokeapi-machine-user commented Sep 12, 2024

Add Generation IX Shape Data #1122

Add Generation IX Shape Data #1122

Conversation

KyroChi commented Aug 18, 2024 • edited Loading

KyroChi commented Aug 18, 2024

Naramsim commented Aug 18, 2024

programgames commented Sep 3, 2024

KyroChi commented Sep 3, 2024

pokeapi-machine-user commented Sep 12, 2024

pokeapi-machine-user commented Sep 12, 2024

KyroChi commented Aug 18, 2024 •

edited

Loading