🗃️ Data extraction on 31-4

This commit is contained in:
zhbaor 2022-10-16 10:52:02 +08:00
parent 958de34dfa
commit 871984e9e7
7 changed files with 335 additions and 0 deletions

View file

@ -0,0 +1,34 @@
import scrapy
import sys
import requests
import io
sys.path.append("../../convert/31-2")
from db import *
db.bind(provider="sqlite", filename="../../clean/31-2/data.sqlite3")
db.generate_mapping()
class OutfitSpider(scrapy.Spider):
name = "outfits"
start_urls = [
"https://deadcells.fandom.com/wiki/Outfits",
]
def parse(self, response):
for quote in response.css(".wikitable tbody tr"):
name_en = quote.css("td:nth-child(2) > span:last-child::text").get()
if not name_en:
print(name_en)
continue
preview = quote.css(
"td:nth-child(6) > span:last-child a::attr('href')"
).get()
try:
r = requests.get(preview)
with db_session:
w = Outfit.select(name_en=name_en).first()
w.preview = io.BytesIO(r.content).getbuffer().tobytes()
except:
print("Preview not available.")