From 7643cf2dae79f9b921fab4136618be193c08a96b Mon Sep 17 00:00:00 2001
From: Zhao Zuohong <zhbaor@zhaozuohong.vip>
Date: Mon, 14 Aug 2023 09:24:03 +0800
Subject: [PATCH] =?UTF-8?q?=E8=AF=86=E5=88=AB=E5=9F=BA=E6=8A=A5=E6=95=B0?=
 =?UTF-8?q?=E6=8D=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 apps/riic_report_analysis.py | 53 +++++++++++++++++++++++----
 rapid_ocr.yml                | 40 +++++++++++++++++++++
 requirements.txt             | 70 ------------------------------------
 3 files changed, 86 insertions(+), 77 deletions(-)
 create mode 100644 rapid_ocr.yml

diff --git a/apps/riic_report_analysis.py b/apps/riic_report_analysis.py
index d97bc4f..917bff4 100644
--- a/apps/riic_report_analysis.py
+++ b/apps/riic_report_analysis.py
@@ -1,11 +1,14 @@
 from pepperbot.core.message.chain import MessageChain, Image, Text
-from paddleocr import PaddleOCR
+from rapidocr_onnxruntime import RapidOCR
 import os
 import asyncio
 import functools
+import re
 
 
-ocr = PaddleOCR(use_angle_cls=False, use_gpu=False)
+rapid_ocr = None
+if not rapid_ocr:
+    rapid_ocr = RapidOCR("rapid_ocr.yml")
 
 
 class RIICReportAnalysis:
@@ -14,14 +17,50 @@ class RIICReportAnalysis:
             return
         img_seg: Image = chain[0]
         img_path = await img_seg.download()
-        print(f"Image saved to {img_path}")
 
         loop = asyncio.get_running_loop()
 
-        ocr_text = await loop.run_in_executor(
-            None, functools.partial(ocr.ocr, img_path, cls=False)
+        result, elapse = await loop.run_in_executor(
+            None, functools.partial(rapid_ocr, img_path)
         )
-        print(ocr_text)
+        if not "副手简报" in [i[1] for i in result]:
+            return
+        lmb_height = 0
+        lmb_numers = []
+        date_list = []
+        exp_list = []
+        gold_list = []
+        for i in result:
+            if i[1] == "龙门币":
+                lmb_height = i[0][0][1]
+                break
+        for i in result:
+            if m := re.search(r"([0-9]+\.[0-9]+)", i[1]):
+                date_list.append([int(i[0][0][0]), m.group(1)])
+                continue
+            if m := re.search(r"EXP([0-9]+)的作战记录", i[1]):
+                exp_list.append([int(i[0][0][0]), m.group(1)])
+                continue
+            if m := re.search(r"([0-9]+)的贵金属", i[1]):
+                gold_list.append([int(i[0][0][0]), m.group(1)])
+                continue
+            if lmb_height - 20 < i[0][0][1] < lmb_height + 20:
+                if m := re.search(r"([0-9]+)", i[1]):
+                    lmb_numers.append([int(i[0][0][0]), m.group(1)])
+
+        lmb_numers.sort(key=lambda x: x[0])
+        gold_list.sort(key=lambda x: x[0])
+        exp_list.sort(key=lambda x: x[0])
+        date_list.sort(key=lambda x: x[0])
+
+        output = ""
+
+        for i in range(len(date_list)):
+            output += f"【{date_list[i][1]}】\n"
+            output += f"💵 {lmb_numers[2 * i][1]}订单({lmb_numers[2 * i + 1][1]})\n"
+            output += f"🧈 {gold_list[i][1]}赤金\n"
+            output += f"📼 {exp_list[i][1]}经验\n\n"
+
+        await chain.onebot_reply(Text(output.strip()))
 
         os.remove(img_path)
-        print(f"file removed.")
diff --git a/rapid_ocr.yml b/rapid_ocr.yml
new file mode 100644
index 0000000..3c5ec96
--- /dev/null
+++ b/rapid_ocr.yml
@@ -0,0 +1,40 @@
+Global:
+    text_score: 0.5
+    use_angle_cls: false
+    use_text_det: true
+    print_verbose: false
+    min_height: 30
+    width_height_ratio: 8
+
+Det:
+    use_cuda: false
+
+    model_path: models/ch_PP-OCRv3_det_infer.onnx
+
+    limit_side_len: 736
+    limit_type: min
+
+    thresh: 0.3
+    box_thresh: 0.5
+    max_candidates: 1000
+    unclip_ratio: 1.6
+    use_dilation: false
+    score_mode: fast
+
+Cls:
+    use_cuda: false
+
+    model_path: models/ch_ppocr_mobile_v2.0_cls_infer.onnx
+
+    cls_image_shape: [3, 48, 192]
+    cls_batch_num: 6
+    cls_thresh: 0.9
+    label_list: ['0', '180']
+
+Rec:
+    use_cuda: false
+
+    model_path: models/ch_PP-OCRv3_rec_infer.onnx
+
+    rec_img_shape: [3, 48, 320]
+    rec_batch_num: 6
diff --git a/requirements.txt b/requirements.txt
index f655970..d037601 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,35 +3,14 @@ aiosqlite==0.19.0
 anyio==3.7.1
 APScheduler==3.10.2
 arrow==1.2.3
-astor==0.8.1
 asttokens==2.2.1
-attrdict==2.0.1
-Babel==2.12.1
 backports.zoneinfo==0.2.1
-bce-python-sdk==0.8.87
-beautifulsoup4==4.12.2
 better-exceptions==0.3.3
-blinker==1.6.2
-cachetools==5.3.1
 certifi==2023.7.22
-charset-normalizer==3.2.0
-click==8.1.6
-contourpy==1.1.0
-cssselect==1.2.0
-cssutils==2.7.1
-cycler==0.11.0
-Cython==3.0.0
 databases==0.6.2
-decorator==5.1.1
 devtools==0.11.0
-et-xmlfile==1.1.0
 exceptiongroup==1.1.2
 executing==1.2.0
-fire==0.5.0
-Flask==2.3.2
-flask-babel==3.1.0
-fonttools==4.42.0
-future==0.18.3
 greenlet==2.0.2
 h11==0.14.0
 html5tagger==1.3.0
@@ -39,80 +18,31 @@ httpcore==0.17.3
 httptools==0.6.0
 httpx==0.24.1
 idna==3.4
-imageio==2.31.1
-imgaug==0.4.0
-importlib-metadata==6.8.0
-importlib-resources==6.0.1
-itsdangerous==2.1.2
-Jinja2==3.1.2
-kiwisolver==1.4.4
-lazy_loader==0.3
-lmdb==1.4.1
 loguru==0.7.0
-lxml==4.9.3
 markdown-it-py==3.0.0
-MarkupSafe==2.1.3
-matplotlib==3.7.2
 mdurl==0.1.2
 multidict==6.0.4
-networkx==3.1
-numpy==1.24.4
-opencv-contrib-python==4.6.0.66
-opencv-python==4.6.0.66
-openpyxl==3.1.2
-opt-einsum==3.3.0
 orjson==3.9.4
 ormar==0.12.2
-packaging==23.1
-paddle-bfloat==0.1.7
-paddleocr==2.7.0.2
-paddlepaddle==2.5.1
-pandas==2.0.3
-pdf2docx==0.5.6
 pepperbot==0.3.6
-Pillow==10.0.0
-premailer==3.10.0
-protobuf==4.24.0
-psutil==5.9.5
 pyaes==1.6.1
-pyclipper==1.3.0.post4
-pycryptodome==3.18.0
 pydantic==1.10.8
 Pygments==2.16.1
-PyMuPDF==1.20.2
-pyparsing==3.0.9
 Pyrogram==2.0.106
 PySocks==1.7.1
 python-dateutil==2.8.2
-python-docx==0.8.11
 python-dotenv==1.0.0
 pytz==2023.3
-PyWavelets==1.4.1
-rapidfuzz==3.2.0
-rarfile==4.0
-requests==2.31.0
 rich==13.5.2
 sanic==23.6.0
 sanic-routing==23.6.0
-scikit-image==0.21.0
-scipy==1.10.1
-shapely==2.0.1
 six==1.16.0
 sniffio==1.3.0
-soupsieve==2.4.1
 SQLAlchemy==1.4.41
-termcolor==2.3.0
 TgCrypto==1.2.5
-tifffile==2023.7.10
-tqdm==4.66.1
 tracerite==1.1.0
 typing_extensions==4.7.1
-tzdata==2023.3
 tzlocal==5.0.1
 ujson==5.8.0
-urllib3==2.0.4
 uvloop==0.17.0
-visualdl==2.5.3
 websockets==11.0.3
-Werkzeug==2.3.6
-zipp==3.16.2