识别基报数据

This commit is contained in:
zhbaor 2023-08-14 09:24:03 +08:00
parent 94eca0ebeb
commit 7643cf2dae
3 changed files with 86 additions and 77 deletions

View file

@ -1,11 +1,14 @@
from pepperbot.core.message.chain import MessageChain, Image, Text from pepperbot.core.message.chain import MessageChain, Image, Text
from paddleocr import PaddleOCR from rapidocr_onnxruntime import RapidOCR
import os import os
import asyncio import asyncio
import functools import functools
import re
ocr = PaddleOCR(use_angle_cls=False, use_gpu=False) rapid_ocr = None
if not rapid_ocr:
rapid_ocr = RapidOCR("rapid_ocr.yml")
class RIICReportAnalysis: class RIICReportAnalysis:
@ -14,14 +17,50 @@ class RIICReportAnalysis:
return return
img_seg: Image = chain[0] img_seg: Image = chain[0]
img_path = await img_seg.download() img_path = await img_seg.download()
print(f"Image saved to {img_path}")
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
ocr_text = await loop.run_in_executor( result, elapse = await loop.run_in_executor(
None, functools.partial(ocr.ocr, img_path, cls=False) None, functools.partial(rapid_ocr, img_path)
) )
print(ocr_text) if not "副手简报" in [i[1] for i in result]:
return
lmb_height = 0
lmb_numers = []
date_list = []
exp_list = []
gold_list = []
for i in result:
if i[1] == "龙门币":
lmb_height = i[0][0][1]
break
for i in result:
if m := re.search(r"([0-9]+\.[0-9]+)", i[1]):
date_list.append([int(i[0][0][0]), m.group(1)])
continue
if m := re.search(r"EXP([0-9]+)的作战记录", i[1]):
exp_list.append([int(i[0][0][0]), m.group(1)])
continue
if m := re.search(r"([0-9]+)的贵金属", i[1]):
gold_list.append([int(i[0][0][0]), m.group(1)])
continue
if lmb_height - 20 < i[0][0][1] < lmb_height + 20:
if m := re.search(r"([0-9]+)", i[1]):
lmb_numers.append([int(i[0][0][0]), m.group(1)])
lmb_numers.sort(key=lambda x: x[0])
gold_list.sort(key=lambda x: x[0])
exp_list.sort(key=lambda x: x[0])
date_list.sort(key=lambda x: x[0])
output = ""
for i in range(len(date_list)):
output += f"{date_list[i][1]}\n"
output += f"💵 {lmb_numers[2 * i][1]}订单({lmb_numers[2 * i + 1][1]})\n"
output += f"🧈 {gold_list[i][1]}赤金\n"
output += f"📼 {exp_list[i][1]}经验\n\n"
await chain.onebot_reply(Text(output.strip()))
os.remove(img_path) os.remove(img_path)
print(f"file removed.")

40
rapid_ocr.yml Normal file
View file

@ -0,0 +1,40 @@
Global:
text_score: 0.5
use_angle_cls: false
use_text_det: true
print_verbose: false
min_height: 30
width_height_ratio: 8
Det:
use_cuda: false
model_path: models/ch_PP-OCRv3_det_infer.onnx
limit_side_len: 736
limit_type: min
thresh: 0.3
box_thresh: 0.5
max_candidates: 1000
unclip_ratio: 1.6
use_dilation: false
score_mode: fast
Cls:
use_cuda: false
model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx
cls_image_shape: [3, 48, 192]
cls_batch_num: 6
cls_thresh: 0.9
label_list: ['0', '180']
Rec:
use_cuda: false
model_path: models/ch_PP-OCRv3_rec_infer.onnx
rec_img_shape: [3, 48, 320]
rec_batch_num: 6

View file

@ -3,35 +3,14 @@ aiosqlite==0.19.0
anyio==3.7.1 anyio==3.7.1
APScheduler==3.10.2 APScheduler==3.10.2
arrow==1.2.3 arrow==1.2.3
astor==0.8.1
asttokens==2.2.1 asttokens==2.2.1
attrdict==2.0.1
Babel==2.12.1
backports.zoneinfo==0.2.1 backports.zoneinfo==0.2.1
bce-python-sdk==0.8.87
beautifulsoup4==4.12.2
better-exceptions==0.3.3 better-exceptions==0.3.3
blinker==1.6.2
cachetools==5.3.1
certifi==2023.7.22 certifi==2023.7.22
charset-normalizer==3.2.0
click==8.1.6
contourpy==1.1.0
cssselect==1.2.0
cssutils==2.7.1
cycler==0.11.0
Cython==3.0.0
databases==0.6.2 databases==0.6.2
decorator==5.1.1
devtools==0.11.0 devtools==0.11.0
et-xmlfile==1.1.0
exceptiongroup==1.1.2 exceptiongroup==1.1.2
executing==1.2.0 executing==1.2.0
fire==0.5.0
Flask==2.3.2
flask-babel==3.1.0
fonttools==4.42.0
future==0.18.3
greenlet==2.0.2 greenlet==2.0.2
h11==0.14.0 h11==0.14.0
html5tagger==1.3.0 html5tagger==1.3.0
@ -39,80 +18,31 @@ httpcore==0.17.3
httptools==0.6.0 httptools==0.6.0
httpx==0.24.1 httpx==0.24.1
idna==3.4 idna==3.4
imageio==2.31.1
imgaug==0.4.0
importlib-metadata==6.8.0
importlib-resources==6.0.1
itsdangerous==2.1.2
Jinja2==3.1.2
kiwisolver==1.4.4
lazy_loader==0.3
lmdb==1.4.1
loguru==0.7.0 loguru==0.7.0
lxml==4.9.3
markdown-it-py==3.0.0 markdown-it-py==3.0.0
MarkupSafe==2.1.3
matplotlib==3.7.2
mdurl==0.1.2 mdurl==0.1.2
multidict==6.0.4 multidict==6.0.4
networkx==3.1
numpy==1.24.4
opencv-contrib-python==4.6.0.66
opencv-python==4.6.0.66
openpyxl==3.1.2
opt-einsum==3.3.0
orjson==3.9.4 orjson==3.9.4
ormar==0.12.2 ormar==0.12.2
packaging==23.1
paddle-bfloat==0.1.7
paddleocr==2.7.0.2
paddlepaddle==2.5.1
pandas==2.0.3
pdf2docx==0.5.6
pepperbot==0.3.6 pepperbot==0.3.6
Pillow==10.0.0
premailer==3.10.0
protobuf==4.24.0
psutil==5.9.5
pyaes==1.6.1 pyaes==1.6.1
pyclipper==1.3.0.post4
pycryptodome==3.18.0
pydantic==1.10.8 pydantic==1.10.8
Pygments==2.16.1 Pygments==2.16.1
PyMuPDF==1.20.2
pyparsing==3.0.9
Pyrogram==2.0.106 Pyrogram==2.0.106
PySocks==1.7.1 PySocks==1.7.1
python-dateutil==2.8.2 python-dateutil==2.8.2
python-docx==0.8.11
python-dotenv==1.0.0 python-dotenv==1.0.0
pytz==2023.3 pytz==2023.3
PyWavelets==1.4.1
rapidfuzz==3.2.0
rarfile==4.0
requests==2.31.0
rich==13.5.2 rich==13.5.2
sanic==23.6.0 sanic==23.6.0
sanic-routing==23.6.0 sanic-routing==23.6.0
scikit-image==0.21.0
scipy==1.10.1
shapely==2.0.1
six==1.16.0 six==1.16.0
sniffio==1.3.0 sniffio==1.3.0
soupsieve==2.4.1
SQLAlchemy==1.4.41 SQLAlchemy==1.4.41
termcolor==2.3.0
TgCrypto==1.2.5 TgCrypto==1.2.5
tifffile==2023.7.10
tqdm==4.66.1
tracerite==1.1.0 tracerite==1.1.0
typing_extensions==4.7.1 typing_extensions==4.7.1
tzdata==2023.3
tzlocal==5.0.1 tzlocal==5.0.1
ujson==5.8.0 ujson==5.8.0
urllib3==2.0.4
uvloop==0.17.0 uvloop==0.17.0
visualdl==2.5.3
websockets==11.0.3 websockets==11.0.3
Werkzeug==2.3.6
zipp==3.16.2