🗃️ Data extraction on 31-4

2022-10-16 10:52:02 +08:00 · 2022-10-16 10:52:02 +08:00 · 871984e9e7
commit 871984e9e7
parent 958de34dfa
7 changed files with 335 additions and 0 deletions
--- a/data/patch/31-4/outfits.py
+++ b/data/patch/31-4/outfits.py
@ -0,0 +1,34 @@
+import scrapy
+import sys
+import requests
+import io
+
+sys.path.append("../../convert/31-2")
+from db import *
+
+db.bind(provider="sqlite", filename="../../clean/31-2/data.sqlite3")
+db.generate_mapping()
+
+
+class OutfitSpider(scrapy.Spider):
+    name = "outfits"
+    start_urls = [
+        "https://deadcells.fandom.com/wiki/Outfits",
+    ]
+
+    def parse(self, response):
+        for quote in response.css(".wikitable tbody tr"):
+            name_en = quote.css("td:nth-child(2) > span:last-child::text").get()
+            if not name_en:
+                print(name_en)
+                continue
+            preview = quote.css(
+                "td:nth-child(6) > span:last-child a::attr('href')"
+            ).get()
+            try:
+                r = requests.get(preview)
+                with db_session:
+                    w = Outfit.select(name_en=name_en).first()
+                    w.preview = io.BytesIO(r.content).getbuffer().tobytes()
+            except:
+                print("Preview not available.")