From 7643cf2dae79f9b921fab4136618be193c08a96b Mon Sep 17 00:00:00 2001 From: Zhao Zuohong Date: Mon, 14 Aug 2023 09:24:03 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=9F=BA=E6=8A=A5=E6=95=B0?= =?UTF-8?q?=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/riic_report_analysis.py | 53 +++++++++++++++++++++++---- rapid_ocr.yml | 40 +++++++++++++++++++++ requirements.txt | 70 ------------------------------------ 3 files changed, 86 insertions(+), 77 deletions(-) create mode 100644 rapid_ocr.yml diff --git a/apps/riic_report_analysis.py b/apps/riic_report_analysis.py index d97bc4f..917bff4 100644 --- a/apps/riic_report_analysis.py +++ b/apps/riic_report_analysis.py @@ -1,11 +1,14 @@ from pepperbot.core.message.chain import MessageChain, Image, Text -from paddleocr import PaddleOCR +from rapidocr_onnxruntime import RapidOCR import os import asyncio import functools +import re -ocr = PaddleOCR(use_angle_cls=False, use_gpu=False) +rapid_ocr = None +if not rapid_ocr: + rapid_ocr = RapidOCR("rapid_ocr.yml") class RIICReportAnalysis: @@ -14,14 +17,50 @@ class RIICReportAnalysis: return img_seg: Image = chain[0] img_path = await img_seg.download() - print(f"Image saved to {img_path}") loop = asyncio.get_running_loop() - ocr_text = await loop.run_in_executor( - None, functools.partial(ocr.ocr, img_path, cls=False) + result, elapse = await loop.run_in_executor( + None, functools.partial(rapid_ocr, img_path) ) - print(ocr_text) + if not "副手简报" in [i[1] for i in result]: + return + lmb_height = 0 + lmb_numers = [] + date_list = [] + exp_list = [] + gold_list = [] + for i in result: + if i[1] == "龙门币": + lmb_height = i[0][0][1] + break + for i in result: + if m := re.search(r"([0-9]+\.[0-9]+)", i[1]): + date_list.append([int(i[0][0][0]), m.group(1)]) + continue + if m := re.search(r"EXP([0-9]+)的作战记录", i[1]): + exp_list.append([int(i[0][0][0]), m.group(1)]) + continue + if m := re.search(r"([0-9]+)的贵金属", i[1]): + gold_list.append([int(i[0][0][0]), m.group(1)]) + continue + if lmb_height - 20 < i[0][0][1] < lmb_height + 20: + if m := re.search(r"([0-9]+)", i[1]): + lmb_numers.append([int(i[0][0][0]), m.group(1)]) + + lmb_numers.sort(key=lambda x: x[0]) + gold_list.sort(key=lambda x: x[0]) + exp_list.sort(key=lambda x: x[0]) + date_list.sort(key=lambda x: x[0]) + + output = "" + + for i in range(len(date_list)): + output += f"【{date_list[i][1]}】\n" + output += f"💵 {lmb_numers[2 * i][1]}订单({lmb_numers[2 * i + 1][1]})\n" + output += f"🧈 {gold_list[i][1]}赤金\n" + output += f"📼 {exp_list[i][1]}经验\n\n" + + await chain.onebot_reply(Text(output.strip())) os.remove(img_path) - print(f"file removed.") diff --git a/rapid_ocr.yml b/rapid_ocr.yml new file mode 100644 index 0000000..3c5ec96 --- /dev/null +++ b/rapid_ocr.yml @@ -0,0 +1,40 @@ +Global: + text_score: 0.5 + use_angle_cls: false + use_text_det: true + print_verbose: false + min_height: 30 + width_height_ratio: 8 + +Det: + use_cuda: false + + model_path: models/ch_PP-OCRv3_det_infer.onnx + + limit_side_len: 736 + limit_type: min + + thresh: 0.3 + box_thresh: 0.5 + max_candidates: 1000 + unclip_ratio: 1.6 + use_dilation: false + score_mode: fast + +Cls: + use_cuda: false + + model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx + + cls_image_shape: [3, 48, 192] + cls_batch_num: 6 + cls_thresh: 0.9 + label_list: ['0', '180'] + +Rec: + use_cuda: false + + model_path: models/ch_PP-OCRv3_rec_infer.onnx + + rec_img_shape: [3, 48, 320] + rec_batch_num: 6 diff --git a/requirements.txt b/requirements.txt index f655970..d037601 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,35 +3,14 @@ aiosqlite==0.19.0 anyio==3.7.1 APScheduler==3.10.2 arrow==1.2.3 -astor==0.8.1 asttokens==2.2.1 -attrdict==2.0.1 -Babel==2.12.1 backports.zoneinfo==0.2.1 -bce-python-sdk==0.8.87 -beautifulsoup4==4.12.2 better-exceptions==0.3.3 -blinker==1.6.2 -cachetools==5.3.1 certifi==2023.7.22 -charset-normalizer==3.2.0 -click==8.1.6 -contourpy==1.1.0 -cssselect==1.2.0 -cssutils==2.7.1 -cycler==0.11.0 -Cython==3.0.0 databases==0.6.2 -decorator==5.1.1 devtools==0.11.0 -et-xmlfile==1.1.0 exceptiongroup==1.1.2 executing==1.2.0 -fire==0.5.0 -Flask==2.3.2 -flask-babel==3.1.0 -fonttools==4.42.0 -future==0.18.3 greenlet==2.0.2 h11==0.14.0 html5tagger==1.3.0 @@ -39,80 +18,31 @@ httpcore==0.17.3 httptools==0.6.0 httpx==0.24.1 idna==3.4 -imageio==2.31.1 -imgaug==0.4.0 -importlib-metadata==6.8.0 -importlib-resources==6.0.1 -itsdangerous==2.1.2 -Jinja2==3.1.2 -kiwisolver==1.4.4 -lazy_loader==0.3 -lmdb==1.4.1 loguru==0.7.0 -lxml==4.9.3 markdown-it-py==3.0.0 -MarkupSafe==2.1.3 -matplotlib==3.7.2 mdurl==0.1.2 multidict==6.0.4 -networkx==3.1 -numpy==1.24.4 -opencv-contrib-python==4.6.0.66 -opencv-python==4.6.0.66 -openpyxl==3.1.2 -opt-einsum==3.3.0 orjson==3.9.4 ormar==0.12.2 -packaging==23.1 -paddle-bfloat==0.1.7 -paddleocr==2.7.0.2 -paddlepaddle==2.5.1 -pandas==2.0.3 -pdf2docx==0.5.6 pepperbot==0.3.6 -Pillow==10.0.0 -premailer==3.10.0 -protobuf==4.24.0 -psutil==5.9.5 pyaes==1.6.1 -pyclipper==1.3.0.post4 -pycryptodome==3.18.0 pydantic==1.10.8 Pygments==2.16.1 -PyMuPDF==1.20.2 -pyparsing==3.0.9 Pyrogram==2.0.106 PySocks==1.7.1 python-dateutil==2.8.2 -python-docx==0.8.11 python-dotenv==1.0.0 pytz==2023.3 -PyWavelets==1.4.1 -rapidfuzz==3.2.0 -rarfile==4.0 -requests==2.31.0 rich==13.5.2 sanic==23.6.0 sanic-routing==23.6.0 -scikit-image==0.21.0 -scipy==1.10.1 -shapely==2.0.1 six==1.16.0 sniffio==1.3.0 -soupsieve==2.4.1 SQLAlchemy==1.4.41 -termcolor==2.3.0 TgCrypto==1.2.5 -tifffile==2023.7.10 -tqdm==4.66.1 tracerite==1.1.0 typing_extensions==4.7.1 -tzdata==2023.3 tzlocal==5.0.1 ujson==5.8.0 -urllib3==2.0.4 uvloop==0.17.0 -visualdl==2.5.3 websockets==11.0.3 -Werkzeug==2.3.6 -zipp==3.16.2