上传一些测试文件

2025-05-23 19:36:44 +08:00 · 2025-05-23 19:36:44 +08:00 · 44782a3117
commit 44782a3117
parent cee44523c6
10 changed files with 912 additions and 689 deletions
--- a/depot_test/output/matches_knn_only.png
+++ b/depot_test/output/matches_knn_only.png
--- a/depot_test/stitched_image_multi.png
+++ b/depot_test/stitched_image_multi.png
--- a/depot_test/仓库识别MobileNetV3
+++ b/depot_test/仓库识别MobileNetV3
@ -0,0 +1,324 @@
+import glob
+import os
+import re  # 用于解析文件名中的标签
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+from torchvision import models, transforms
+from torchvision.datasets import ImageFolder
+from tqdm import tqdm  # 用于显示进度条
+
+# --- 配置参数 ---
+TRAIN_DIR = "训练集"
+VAL_TEST_DIR = "测试集"  # 根据你的描述，验证集和测试集是同一个目录，文件格式相同
+IMAGE_SIZE = 224  # MobileNetV3 的标准输入大小
+BATCH_SIZE = 32
+NUM_EPOCHS = 20  # 可以根据需要调整
+LEARNING_RATE = 0.001  # 初始学习率
+SAVE_MODEL_PATH = "mobilenetv3_small_finetuned.pth"
+
+# 自动检测设备
+device = torch.device("cuda")
+print(f"使用设备: {device}")
+
+# --- 数据预处理和增强 ---
+# ImageNet 标准均值和标准差
+mean = [0.485, 0.456, 0.406]
+std = [0.229, 0.224, 0.225]
+
+# 训练集数据增强和预处理
+train_transforms = transforms.Compose(
+    [
+        transforms.RandomResizedCrop(IMAGE_SIZE),  # 随机裁剪并缩放
+        transforms.RandomHorizontalFlip(),  # 随机水平翻转
+        transforms.ToTensor(),  # 转换为 Tensor
+        transforms.Normalize(mean, std),  # 标准化
+    ]
+)
+
+# 验证/测试集数据预处理 (不需要数据增强，只需要中心裁剪和标准化)
+val_test_transforms = transforms.Compose(
+    [
+        transforms.Resize(int(IMAGE_SIZE * 256 / 224)),  # 缩放到较大尺寸
+        transforms.CenterCrop(IMAGE_SIZE),  # 中心裁剪到目标尺寸
+        transforms.ToTensor(),  # 转换为 Tensor
+        transforms.Normalize(mean, std),  # 标准化
+    ]
+)
+
+
+# --- 自定义测试集 Dataset ---
+# 需要一个自定义 Dataset 来处理 "测试集/{idx}_{label}.png" 这种文件命名格式
+class CustomValTestDataset(Dataset):
+    def __init__(self, root_dir, class_to_idx, transform=None):
+        """
+        Args:
+            root_dir (string): 数据集根目录 (e.g., "测试集").
+            class_to_idx (dict): 从类别名称到索引的映射，与训练集一致。
+            transform (callable, optional): 应用于图像的转换.
+        """
+        self.root_dir = root_dir
+        self.transform = transform
+        self.class_to_idx = class_to_idx
+        self.idx_to_class = {v: k for k, v in class_to_idx.items()}
+        self.image_files = []
+        self.labels = []
+
+        # 遍历目录下的所有png文件
+        filepaths = glob.glob(os.path.join(root_dir, "*.png"))
+
+        # 解析文件名，提取标签
+        pattern = re.compile(r"^\d+_([^_]+)\.png$")  # 匹配 数字_标签.png
+
+        for filepath in filepaths:
+            filename = os.path.basename(filepath)
+            match = pattern.match(filename)
+            if match:
+                label_name = match.group(1)
+                if label_name in self.class_to_idx:
+                    self.image_files.append(filepath)
+                    self.labels.append(self.class_to_idx[label_name])
+                else:
+                    print(
+                        f"警告: 文件 '{filename}' 中的标签 '{label_name}' 不在训练集的类别中，将跳过。"
+                    )
+
+        print(f"加载了 {len(self.image_files)} 张验证/测试图片。")
+
+    def __len__(self):
+        return len(self.image_files)
+
+    def __getitem__(self, idx):
+        if torch.is_tensor(idx):
+            idx = idx.tolist()
+
+        img_path = self.image_files[idx]
+        label = self.labels[idx]
+
+        # 打开图像，确保是 RGB (处理可能的灰度图)
+        img = Image.open(img_path).convert("RGB")
+
+        if self.transform:
+            img = self.transform(img)
+
+        return img, label
+
+
+# --- 加载数据 ---
+# 使用 ImageFolder 加载训练集，它会自动从目录名解析类别
+if not os.path.exists(TRAIN_DIR):
+    print(
+        f"错误: 训练集目录 '{TRAIN_DIR}' 不存在。请创建该目录并放入分类好的图片子目录。"
+    )
+    exit()
+
+if not os.path.exists(VAL_TEST_DIR):
+    print(f"错误: 验证/测试集目录 '{VAL_TEST_DIR}' 不存在。请创建该目录并放入图片。")
+    exit()
+
+
+train_dataset = ImageFolder(TRAIN_DIR, transform=train_transforms)
+num_classes = len(train_dataset.classes)
+class_to_idx = train_dataset.class_to_idx  # 获取类别到索引的映射
+
+print(f"从训练集检测到 {num_classes} 个类别: {train_dataset.classes}")
+
+# 使用自定义 Dataset 加载验证/测试集
+val_test_dataset = CustomValTestDataset(
+    VAL_TEST_DIR, class_to_idx, transform=val_test_transforms
+)
+
+# 创建 DataLoader
+train_loader = DataLoader(
+    train_dataset, batch_size=BATCH_SIZE, shuffle=True
+)  # num_workers 根据你的机器性能调整
+val_test_loader = DataLoader(val_test_dataset, batch_size=BATCH_SIZE, shuffle=False)
+
+# --- 加载预训练的 MobileNetV3-Small 模型 ---
+# 使用 weights 参数来指定预训练权重
+# MobileNetV3_Small_Weights.IMAGENET1K_V1 是在 ImageNet 上预训练的权重
+try:
+    model = models.mobilenet_v3_small(
+        weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1
+    )
+    print("成功加载预训练的 MobileNetV3-Small 模型 (ImageNet weights)。")
+except Exception as e:
+    print(f"加载预训练模型失败: {e}")
+    print("尝试加载不带权重的模型...")
+    model = models.mobilenet_v3_small(weights=None)
+
+
+# --- 修改全连接层以匹配新的类别数量 ---
+# MobileNetV3 的分类器是 model.classifier
+# 最后一个线性层是 classifier[-1]
+num_ftrs = model.classifier[-1].in_features
+# 替换掉原来的全连接层
+model.classifier[-1] = nn.Linear(num_ftrs, num_classes)
+
+model = model.to(device)
+
+# --- 定义损失函数和优化器 ---
+criterion = nn.CrossEntropyLoss()  # 交叉熵损失适用于分类问题
+optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)  # Adam 优化器
+
+# 可选：学习率调度器，帮助调整学习率
+# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) # 每7个epoch降低学习率
+
+# --- 训练和评估函数 ---
+
+
+def train_epoch(model, train_loader, criterion, optimizer, device):
+    model.train()  # 设置模型为训练模式
+    running_loss = 0.0
+    correct_predictions = 0
+    total_samples = 0
+
+    # 使用 tqdm 显示进度条
+    for inputs, labels in tqdm(train_loader, desc="训练中"):
+        inputs, labels = inputs.to(device), labels.to(device)
+
+        # 梯度清零
+        optimizer.zero_grad()
+
+        # 前向传播
+        outputs = model(inputs)
+        loss = criterion(outputs, labels)
+
+        # 反向传播和优化
+        loss.backward()
+        optimizer.step()
+
+        # 统计
+        running_loss += loss.item() * inputs.size(0)  # 累加 batch loss * batch size
+        _, predicted = torch.max(outputs, 1)  # 获取预测结果
+        correct_predictions += (predicted == labels).sum().item()
+        total_samples += labels.size(0)
+
+    epoch_loss = running_loss / total_samples
+    epoch_accuracy = correct_predictions / total_samples
+    return epoch_loss, epoch_accuracy
+
+
+def evaluate(model, data_loader, criterion, device, desc="评估中"):
+    model.eval()  # 设置模型为评估模式
+    running_loss = 0.0
+    correct_predictions = 0
+    total_samples = 0
+
+    # 在评估阶段不计算梯度
+    with torch.no_grad():
+        # 使用 tqdm 显示进度条
+        for inputs, labels in tqdm(data_loader, desc=desc):
+            inputs, labels = inputs.to(device), labels.to(device)
+
+            # 前向传播
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+
+            # 统计
+            running_loss += loss.item() * inputs.size(0)
+            _, predicted = torch.max(outputs, 1)
+            correct_predictions += (predicted == labels).sum().item()
+            total_samples += labels.size(0)
+
+    epoch_loss = running_loss / total_samples
+    epoch_accuracy = correct_predictions / total_samples
+    return epoch_loss, epoch_accuracy
+
+
+# --- 训练循环 ---
+best_val_accuracy = 0.0
+
+print("\n开始训练...")
+for epoch in range(NUM_EPOCHS):
+    print(f"\n--- Epoch {epoch + 1}/{NUM_EPOCHS} ---")
+
+    # 训练阶段
+    train_loss, train_accuracy = train_epoch(
+        model, train_loader, criterion, optimizer, device
+    )
+    print(
+        f"Epoch {epoch + 1} 训练 Loss: {train_loss:.4f}, 准确率: {train_accuracy:.4f}"
+    )
+
+    # 可选：学习率调度
+    # if scheduler is not None:
+    #     scheduler.step()
+
+    # 验证/测试阶段
+    val_loss, val_accuracy = evaluate(
+        model, val_test_loader, criterion, device, desc="验证/测试中"
+    )
+    print(
+        f"Epoch {epoch + 1} 验证/测试 Loss: {val_loss:.4f}, 准确率: {val_accuracy:.4f}"
+    )
+
+    # 保存最优模型
+    if val_accuracy > best_val_accuracy:
+        best_val_accuracy = val_accuracy
+        torch.save(model.state_dict(), SAVE_MODEL_PATH)
+        print(
+            f"保存了验证/测试集上最优的模型 (准确率: {best_val_accuracy:.4f}) 到 {SAVE_MODEL_PATH}"
+        )
+
+print("\n训练完成!")
+print(f"在验证/测试集上的最高准确率: {best_val_accuracy:.4f}")
+
+# --- 可选: 加载并测试最优模型 ---
+# 加载保存的最优模型进行最终测试 (如果验证/测试集是同一个，这就是最终结果)
+print(f"\n加载最优模型 '{SAVE_MODEL_PATH}' 进行最终评估...")
+loaded_model = models.mobilenet_v3_small(weights=None)  # 先加载一个空的模型结构
+loaded_model.classifier[-1] = nn.Linear(
+    loaded_model.classifier[-1].in_features, num_classes
+)  # 修改分类器
+loaded_model.load_state_dict(
+    torch.load(SAVE_MODEL_PATH, map_location=device)
+)  # 加载权重
+loaded_model = loaded_model.to(device)
+
+final_test_loss, final_test_accuracy = evaluate(
+    loaded_model, val_test_loader, criterion, device, desc="最终测试中"
+)
+print(
+    f"\n最终测试 Loss: {final_test_loss:.4f}, 最终测试准确率: {final_test_accuracy:.4f}"
+)
+
+
+# --- 可选: 预测单张图片 ---
+
+# 假设你想预测一张名为 '测试集/some_image_X_label.png' 的图片
+def predict_single_image(image_path, model, class_to_idx, device, transform):
+    model.eval()
+    idx_to_class = {v: k for k, v in class_to_idx.items()}
+
+    try:
+        img = Image.open(image_path).convert("RGB")
+        img = transform(img).unsqueeze(0).to(device)  # 添加 batch 维度并移动到设备
+
+        with torch.no_grad():
+            outputs = model(img)
+            probabilities = torch.softmax(outputs, dim=1)[0]  # 获取概率分布
+            _, predicted_idx = torch.max(probabilities, 0)  # 获取最高概率的索引
+            predicted_label = idx_to_class[predicted_idx.item()]
+            confidence = probabilities[predicted_idx].item()
+
+        print(f"\n预测图片: {image_path}")
+        print(f"预测类别: {predicted_label}, 置信度: {confidence:.4f}")
+        return predicted_label, confidence
+
+    except FileNotFoundError:
+        print(f"错误: 图片文件 '{image_path}' 未找到。")
+        return None, None
+    except Exception as e:
+        print(f"预测图片时发生错误: {e}")
+        return None, None
+
+
+# # 示例预测 (取消注释以使用)
+example_image_path = r"测试集\27_基础作战记录.png"  # 替换为你测试集中的实际文件路径
+predict_single_image(
+    example_image_path, loaded_model, class_to_idx, device, val_test_transforms
+)
--- a/depot_test/仓库识别MobileNetV3
+++ b/depot_test/仓库识别MobileNetV3
@ -0,0 +1,378 @@
+import json
+import lzma
+import os
+import pickle
+
+import cv2
+import numpy as np
+import torch
+import torchvision.transforms as transforms
+from PIL import Image, ImageDraw, ImageFont, ImageOps
+from sklearn.neighbors import KNeighborsClassifier
+from torchvision import models
+
+CROP_SIZE = 130
+BORDER = 26
+size = CROP_SIZE * 2 - BORDER * 2
+
+
+# 定义特征提取器
+model = models.mobilenet_v3_small(weights="DEFAULT")
+
+features_part = model.features
+avgpool = torch.nn.AdaptiveAvgPool2d(1)
+classifier_part_excluding_last = torch.nn.Sequential(
+    *list(model.classifier.children())[:-1]
+)
+
+feature_extractor = torch.nn.Sequential(
+    features_part,
+    avgpool,
+    torch.nn.Flatten(start_dim=1),
+    classifier_part_excluding_last,
+)
+feature_extractor.eval()  # 切换到评估模式
+
+
+def 提取特征点(模板):
+    """使用MobileNetV3提取特征 (PyTorch版)"""
+    # 将输入图像从BGR转换为RGB
+    img_rgb = cv2.cvtColor(模板, cv2.COLOR_BGR2RGB)
+
+    # 定义图像预处理流程
+    preprocess = transforms.Compose(
+        [
+            transforms.ToPILImage(),  # 转换为PIL图像
+            transforms.Resize(250),  # 调整大小为224x224
+            transforms.ToTensor(),  # 转换为Tensor
+            transforms.Normalize(  # 归一化
+                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+            ),
+        ]
+    )
+
+    # 预处理图像
+    img_tensor = preprocess(img_rgb)
+    img_tensor = img_tensor.unsqueeze(0)  # 增加batch维度
+
+    # 提取特征
+    with torch.no_grad():
+        features = feature_extractor(img_tensor)
+
+    # 将特征展平为一维
+    features = features.flatten().numpy()
+
+    return features
+
+
+class DepotMatcher:
+    def __init__(
+        self,
+        ref_table_json="./ArknightsGameData/zh_CN/gamedata/excel/item_table.json",
+        icon_dir="./ArknightsResource/items/",
+        ref_dir="depot_test/output/test/origin",
+        roi_dir="depot_test/output/test/result",
+        img_path=r"depot_test\stitched_image_multi.png",
+    ):
+        # 初始化路径配置
+        self.REF_DIR = ref_dir
+        self.ROI_DIR = roi_dir
+        self.IMG_PATH = img_path
+        self.REF_TABLE_JSON = ref_table_json
+        self.ICON_DIR = icon_dir
+
+        # 初始化算法参数
+        self.HOUGH_PARAMS = dict(
+            dp=5, minDist=230, param1=50, param2=30, minRadius=90, maxRadius=100
+        )
+        self.CROP_SIZE = 130
+        self.BORDER = 26
+
+        # 运行时数据存储
+        self.refs = None
+        self.rois = []
+        self.knn_results = []
+        self.knn_model = None
+
+    def load_references(self):
+        """加载物品图标参考图（保留彩色）"""
+        data = json.load(open(self.REF_TABLE_JSON, encoding="utf-8"))
+        self.refs = {}
+        size = self.CROP_SIZE * 2 - self.BORDER * 2
+
+        # 首先收集所有带有sortId的物品
+        items_with_sort = []
+        for item in data.get("items", {}).values():
+            if item.get("classifyType") not in {"NORMAL", "CONSUME", "MATERIAL"}:
+                continue
+
+            path = os.path.join(self.ICON_DIR, f"{item['iconId']}.png")
+            if not os.path.exists(path):
+                continue
+
+            # 保留彩色图像
+            im = Image.open(path).resize((size, size))
+            items_with_sort.append(
+                {
+                    "name": item["name"],
+                    "array": np.array(im),
+                    "sortId": item.get("sortId", 0),
+                }
+            )
+
+        # 按sortId排序
+        items_with_sort.sort(key=lambda x: x["sortId"])
+
+        # 创建最终的refs字典
+        for item in items_with_sort:
+            self.refs[item["name"]] = item["array"]
+
+        print(f"已加载 {len(self.refs)} 个参考图 (按sortId排序)")
+        # 保存训练集图像
+        os.makedirs("训练集", exist_ok=True)
+        for name, array in self.refs.items():
+            os.makedirs(f"训练集/{name}", exist_ok=True)
+            path = os.path.join(f"训练集/{name}", f"{name}.png")
+            im = Image.fromarray(array)
+            cropped_im = im.crop((50, 30, 160, 140))
+            cropped_im.save(path)
+
+        return self
+
+    def _process_circle(self, idx, circle, img):
+        """处理单个圆形区域，返回彩色图像数据"""
+        x, y, r = circle
+        # 裁剪包含圆形的更大区域
+        crop = img[
+            max(0, y - self.CROP_SIZE) : min(img.shape[0], y + self.CROP_SIZE),
+            max(0, x - self.CROP_SIZE) : min(img.shape[1], x + self.CROP_SIZE),
+        ]
+
+        # 提取核心的彩色ROI区域
+        color_roi = crop[self.BORDER : -self.BORDER, self.BORDER : -self.BORDER]
+
+        # 提取用于匹配的彩色区域
+        color_sec = color_roi
+
+        return idx, color_sec, color_roi
+
+    def detect_and_crop(self):
+        """检测并裁剪截图区域"""
+        img = cv2.imread(self.IMG_PATH)
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, **self.HOUGH_PARAMS)
+
+        # 处理检测到的圆形
+        circles = np.round(circles[0]).astype(int)
+        circles = sorted(circles, key=lambda c: (c[0], c[1]))  # 按坐标排序
+
+        self.rois = []
+        for idx, circle in enumerate(circles):
+            result = self._process_circle(idx, circle, img)
+            self.rois.append(result)
+
+        return self
+
+    def 训练并保存knn模型(self, images, labels, filename):
+        """训练并保存KNN模型"""
+        knn_classifier = KNeighborsClassifier(
+            weights="distance", n_neighbors=1, n_jobs=1
+        )
+        knn_classifier.fit(images, labels)
+
+        with lzma.open(filename, "wb") as f:
+            pickle.dump(knn_classifier, f)
+
+        return knn_classifier
+
+    def 训练knn模型(self, 模型保存路径="depot_knn_model.xz"):
+        """训练并保存KNN模型（使用彩色图像）"""
+
+        # 准备训练数据
+        images = []
+        labels = []
+
+        for name, img_array in self.refs.items():
+            features = 提取特征点(img_array)
+            images.append(features)
+            labels.append(name)
+
+        # 训练模型
+        self.knn_model = self.训练并保存knn模型(images, labels, 模型保存路径)
+        print(f"KNN模型训练完成，已保存到: {模型保存路径}")
+        return self
+
+    def 使用knn预测(self, 测试图像):
+        features = 提取特征点(测试图像)
+
+        # 预测
+        预测结果 = self.knn_model.predict([features])
+
+        return 预测结果[0]
+
+    def match_items_knn_only(
+        self,
+        knn_model_path="depot_knn_model.xz",
+    ):
+        """仅使用KNN方法进行匹配"""
+        self.knn_results = []
+        newstart = None
+        with lzma.open(knn_model_path, "rb") as f:
+            self.knn_model = pickle.load(f)
+
+        os.makedirs("测试集", exist_ok=True)
+
+        for idx, color_sec_np, _ in self.rois:
+            # KNN预测
+            roi_gray = cv2.cvtColor(color_sec_np, cv2.COLOR_RGB2GRAY)
+            knn_name = self.使用knn预测(roi_gray)
+
+            self.knn_results.append((idx, knn_name))
+            os.makedirs(f"测试集/{knn_name}", exist_ok=True)
+            Image.fromarray(cv2.cvtColor(color_sec_np, cv2.COLOR_BGR2RGB)).crop(
+                (50, 30, 160, 140)
+            ).save(os.path.join(f"测试集/{knn_name}", f"{idx}_{knn_name}.png"))
+            # 更新newstart逻辑
+            newstart = knn_name
+
+            print(f"ROI {idx}: Hog+Knn={knn_name}, newstart={newstart}")
+
+        return self
+
+    def display_results(self):
+        """可视化匹配结果"""
+        ROW_LIMIT = 9
+
+        # 获取一个参考图像的尺寸作为空白图像的基础
+        blank_ref_np = next(iter(self.refs.values()))
+        blank_img_pil = Image.new(
+            "RGB", (blank_ref_np.shape[1], blank_ref_np.shape[0]), (200, 200, 200)
+        )
+
+        combined_images = []
+        current_row_images = []
+        current_row_width = 0
+        max_row_height = 0
+
+        for idx, color_sec_np, color_roi_data in self.rois:
+            color_roi_data = Image.fromarray(
+                cv2.cvtColor(color_roi_data, cv2.COLOR_BGR2RGB)
+            )
+            color_sec_np = Image.fromarray(
+                cv2.cvtColor(color_sec_np, cv2.COLOR_BGR2RGB)
+            )
+
+            # 获取KNN匹配结果
+            k_res_details = next(
+                (d for d in getattr(self, "knn_results", []) if d[0] == idx), None
+            )
+            k_res_name = k_res_details[1] if k_res_details else None
+            k_ref_img = (
+                Image.fromarray(self.refs[k_res_name]).convert("RGB")
+                if k_res_name and k_res_name in self.refs
+                else blank_img_pil.copy()
+            )
+
+            # 计算组合尺寸
+            combined_width = color_roi_data.width + color_sec_np.width + k_ref_img.width
+
+            combined_height = max(
+                color_roi_data.height,
+                color_sec_np.height,
+                k_ref_img.height,
+            )
+
+            # 创建组合图像
+            combined = Image.new(
+                "RGB", (combined_width, combined_height), (255, 255, 255)
+            )
+            x_offset = 0
+
+            # 粘贴各个部分
+            combined.paste(color_roi_data, (x_offset, 0))
+            x_offset += color_roi_data.width
+
+            combined.paste(color_sec_np, (x_offset, 0))
+            x_offset += color_sec_np.width
+
+            combined.paste(k_ref_img, (x_offset, 0))
+            x_offset += k_ref_img.width
+
+            # 添加标注
+            draw = ImageDraw.Draw(combined)
+            font = ImageFont.truetype("msyh.ttc", 16)
+
+            label = f"ROI {idx}\nHog+Knn: {k_res_name or 'None'}"
+
+            text_color = (0, 0, 0)
+
+            draw.text(
+                (color_roi_data.width, color_sec_np.height),
+                label,
+                fill=text_color,
+                font=font,
+            )
+
+            # 添加边框
+            combined_bordered = ImageOps.expand(combined, border=2, fill=(0, 0, 0))
+            current_row_images.append(combined_bordered)
+            current_row_width += combined_bordered.width
+            max_row_height = max(max_row_height, combined_bordered.height)
+
+            # 检查是否需要换行
+            if len(current_row_images) == ROW_LIMIT:
+                row_img = Image.new(
+                    "RGB", (current_row_width, max_row_height), (255, 255, 255)
+                )
+                x = 0
+                for img in current_row_images:
+                    row_img.paste(img, (x, 0))
+                    x += img.width
+                combined_images.append(row_img)
+                current_row_images = []
+                current_row_width = 0
+                max_row_height = 0
+
+        # 处理最后一行
+        if current_row_images:
+            row_img = Image.new(
+                "RGB", (current_row_width, max_row_height), (255, 255, 255)
+            )
+            x = 0
+            for img in current_row_images:
+                row_img.paste(img, (x, 0))
+                x += img.width
+            combined_images.append(row_img)
+
+        # 生成最终图像
+        if combined_images:
+            total_height = sum(img.height for img in combined_images)
+            max_width = max(img.width for img in combined_images)
+            final_img = Image.new("RGB", (max_width, total_height), (255, 255, 255))
+
+            y = 0
+            for img in combined_images:
+                final_img.paste(img, (0, y))
+                y += img.height
+
+            output_path = "depot_test/output/matches_knn_only.png"
+            os.makedirs(os.path.dirname(output_path), exist_ok=True)
+            final_img.save(output_path)
+            print(f"结果图像已保存至: {output_path}")
+
+        return self
+
+
+if __name__ == "__main__":
+    # 使用示例
+    matcher = DepotMatcher()
+    matcher.load_references()
+    matcher.训练knn模型()
+    from datetime import datetime
+
+    now_time = datetime.now()
+
+    matcher.detect_and_crop()
+    matcher.match_items_knn_only()
+    print(datetime.now() - now_time)
+    matcher.display_results()
--- a/depot_test/仓库识别SSIM.py
+++ b/depot_test/仓库识别SSIM.py
@ -1,188 +0,0 @@
-import json
-import os
-from datetime import datetime
-from multiprocessing import Pool
-
-import cv2
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont, ImageOps
-from skimage.metrics import structural_similarity
-
-now = datetime.now()
-# 配置路径
-REF_DIR = r"depot_test\output/test/origin"
-ROI_DIR = r"depot_test\output/test/result"
-IMG_PATH = r"depot_test\result_refined.png"
-REF_TABLE_JSON = "./ArknightsGameData/zh_CN/gamedata/excel/item_table.json"
-ICON_DIR = "./ArknightsResource/items/"
-
-# SSIM阈值
-SSIM_THRESHOLD = 0.01
-
-# 圆检测参数
-HOUGH_PARAMS = dict(
-    dp=5, minDist=230, param1=50, param2=30, minRadius=90, maxRadius=100
-)
-CROP_SIZE = 130
-BORDER = 26
-SECONDARY_SLICE = (slice(30, 140), slice(50, 160))
-
-
-def load_references(table_json, icon_dir):
-    data = json.load(open(table_json, encoding="utf-8"))
-    refs = {}
-    size = CROP_SIZE * 2 - BORDER * 2
-    for item in data.get("items", {}).values():
-        t = item.get("classifyType")
-        if t not in {"NORMAL", "CONSUME", "MATERIAL"}:
-            continue
-        path = os.path.join(icon_dir, f"{item['iconId']}.png")
-        if not os.path.exists(path):
-            continue
-        im = Image.open(path).resize((size, size)).crop((50, 30, 160, 140)).convert("L")
-        refs[item["name"]] = np.array(im)
-    print(f"已加载 {len(refs)} 个参考图，保存于 {REF_DIR}")
-    return refs
-
-
-def process_circle(idx, circle, img, rois, size, dr):
-    x, y, r = circle
-    crop = img[
-        max(0, y - CROP_SIZE) : min(img.shape[0], y + CROP_SIZE),
-        max(0, x - CROP_SIZE) : min(img.shape[1], x + CROP_SIZE),
-    ]
-    c = crop[BORDER:-BORDER, BORDER:-BORDER]
-    sec = c[SECONDARY_SLICE[0], SECONDARY_SLICE[1]]
-    gray_sec = cv2.cvtColor(sec, cv2.COLOR_BGR2GRAY)
-
-    # 保存 ROI 图像
-    os.makedirs(ROI_DIR, exist_ok=True)
-    roi_path = os.path.join(ROI_DIR, f"roi_{idx}.png")
-    cv2.imwrite(roi_path, gray_sec)
-    rois.append(gray_sec)
-    return idx, gray_sec, roi_path
-
-
-def detect_and_crop(image_path):
-    img = cv2.imread(image_path)
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, **HOUGH_PARAMS)
-    if circles is None:
-        print("未检测到圆形区域")
-        return []
-    circles = np.round(circles[0]).astype(int)
-    rois = []
-    size = CROP_SIZE * 2 - BORDER * 2
-    dr = img.max() - img.min()
-
-    # 使用多进程加速
-    with Pool() as pool:
-        results = pool.starmap(
-            process_circle,
-            [(idx, circle, img, rois, size, dr) for idx, circle in enumerate(circles)],
-        )
-
-    return [roi for idx, roi, path in results]
-
-
-def process_match(idx, roi, refs, thresh):
-    best, score = "Unknown", -1
-    dr = roi.max() - roi.min()
-    for name, ref in refs.items():
-        if roi.shape != ref.shape:
-            continue
-        s = structural_similarity(roi, ref, data_range=dr)
-        if s > score:
-            best, score = name, s
-    if score >= thresh:
-        return idx, best, score
-    return idx, None, score
-
-
-def match_ssim(rois, refs, thresh=SSIM_THRESHOLD):
-    from multiprocessing import Pool
-
-    args = [(idx, roi, refs, thresh) for idx, roi in enumerate(rois)]
-    with Pool(processes=5) as pool:
-        results = pool.starmap(process_match, args)
-
-    stats = {}
-    match_idx = {}
-    for idx, name, score in results:
-        if name:
-            stats[name] = stats.get(name, 0) + 1
-            match_idx[idx] = name
-        print(f"ROI {idx} 匹配结果: {name if name else 'Unknown'} (SSIM={score:.3f})")
-    return stats, match_idx
-
-
-def display_matches(rois, match_idx, refs):
-    ROW_LIMIT = 10  # 每行最多10个
-    blank_ref = next(iter(refs.values()))  # 取一个参考图的尺寸
-    blank_img = Image.new(
-        "RGB", (blank_ref.shape[1], blank_ref.shape[0]), (200, 200, 200)
-    )  # 灰色占位图
-
-    combined_images = []
-    row_images = []
-
-    row_width = 0
-    max_height = 0
-
-    for idx in range(len(rois)):
-        roi_img = Image.fromarray(rois[idx]).convert("RGB")
-        ref_name = match_idx.get(idx)
-        if ref_name:
-            ref_img = Image.fromarray(refs[ref_name]).convert("RGB")
-        else:
-            ref_img = blank_img.copy()
-
-        combined_width = roi_img.width + ref_img.width
-        combined_height = max(roi_img.height, ref_img.height)
-
-        combined = Image.new("RGB", (combined_width, combined_height), (255, 255, 255))
-        combined.paste(roi_img, (0, 0))
-        combined.paste(ref_img, (roi_img.width, 0))
-
-        draw = ImageDraw.Draw(combined)
-
-        font = ImageFont.truetype("msyh.ttc", 20)
-
-        label = f"ROI {idx}: {ref_name if ref_name else 'Unknown'}"
-        draw.text((5, 5), label, fill=(255, 0, 0), font=font)
-        combined = ImageOps.expand(combined, border=2, fill=(0, 0, 0))
-        row_images.append(combined)
-        row_width += combined_width
-        max_height = max(max_height, combined_height)
-
-        if len(row_images) == ROW_LIMIT or idx == len(rois) - 1:
-            row_img = Image.new("RGB", (row_width, max_height), (255, 255, 255))
-            x_offset = 0
-            for img in row_images:
-                row_img.paste(img, (x_offset, 0))
-                x_offset += img.width
-            combined_images.append(row_img)
-            row_images = []
-            row_width = 0
-            max_height = 0
-
-    total_width = max(img.width for img in combined_images)
-    total_height = sum(img.height for img in combined_images)
-
-    final_img = Image.new("RGB", (total_width, total_height), (255, 255, 255))
-    y_offset = 0
-    for img in combined_images:
-        final_img.paste(img, (0, y_offset))
-        y_offset += img.height
-
-    final_img.save("all_matches.png")  # 或 final_img.save("all_matches.png")
-
-
-if __name__ == "__main__":
-    refs = load_references(REF_TABLE_JSON, ICON_DIR)
-    rois = detect_and_crop(IMG_PATH)
-    res, match_idx = match_ssim(rois, refs)
-
-    print("最终识别结果:", res)
-    display_matches(rois, match_idx, refs)
-    print(datetime.now() - now)
--- a/depot_test/仓库识别demo.py
+++ b/depot_test/仓库识别demo.py
@ -1,309 +0,0 @@
-import json
-import os
-from datetime import datetime
-
-import cv2
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont, ImageOps
-from skimage.metrics import structural_similarity as ssim
-
-# 配置路径
-REF_DIR = r"depot_test\output/test/origin"
-ROI_DIR = r"depot_test\output/test/result"
-IMG_PATH = r"depot_test\output\result_template.png"
-REF_TABLE_JSON = "./ArknightsGameData/zh_CN/gamedata/excel/item_table.json"
-ICON_DIR = "./ArknightsResource/items/"
-
-# 参数
-HOUGH_PARAMS = dict(
-    dp=5, minDist=230, param1=50, param2=30, minRadius=90, maxRadius=100
-)
-CROP_SIZE = 130
-BORDER = 26
-SECONDARY_SLICE = (slice(30, 140), slice(50, 160))
-
-
-def load_references(table_json, icon_dir):
-    data = json.load(open(table_json, encoding="utf-8"))
-    refs = {}
-    size = CROP_SIZE * 2 - BORDER * 2
-    for item in data.get("items", {}).values():
-        if item.get("classifyType") not in {"NORMAL", "CONSUME", "MATERIAL"}:
-            continue
-        path = os.path.join(icon_dir, f"{item['iconId']}.png")
-        if not os.path.exists(path):
-            continue
-        im = Image.open(path).resize((size, size)).crop((50, 30, 160, 140)).convert("L")
-        refs[item["name"]] = np.array(im)
-    print(f"已加载 {len(refs)} 个参考图")
-    return refs
-
-
-def process_circle(idx, circle, img):
-    x, y, r = circle
-    crop = img[
-        max(0, y - CROP_SIZE) : min(img.shape[0], y + CROP_SIZE),
-        max(0, x - CROP_SIZE) : min(img.shape[1], x + CROP_SIZE),
-    ]
-    # Save color ROI
-    os.makedirs(ROI_DIR, exist_ok=True)
-    color_roi_path = os.path.join(ROI_DIR, f"color_roi_{idx}.png")
-    cv2.imwrite(color_roi_path, crop[BORDER:-BORDER, BORDER:-BORDER])
-
-    c = crop[BORDER:-BORDER, BORDER:-BORDER]
-    sec = c[SECONDARY_SLICE[0], SECONDARY_SLICE[1]]
-    gray_sec = cv2.cvtColor(sec, cv2.COLOR_BGR2GRAY)
-    roi_path = os.path.join(ROI_DIR, f"roi_{idx}.png")
-    cv2.imwrite(roi_path, gray_sec)
-    return idx, gray_sec, roi_path, color_roi_path
-
-
-def detect_and_crop(image_path):
-    img = cv2.imread(image_path)
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, **HOUGH_PARAMS)
-    if circles is None:
-        print("未检测到圆形区域")
-        return []
-
-    # Convert and sort circles by y then x coordinate
-    circles = np.round(circles[0]).astype(int)
-    circles = sorted(circles, key=lambda c: (c[0], c[1]))  # Sort by y then x
-
-    results = []
-    for idx, circle in enumerate(circles):
-        result = process_circle(idx, circle, img)
-        results.append(result)
-
-    return results
-
-
-def match_template(rois, refs, thresh):
-    results = []
-    for idx, roi, _, _ in rois:
-        best, max_val = "Unknown", -1.0
-        roi_f = roi.astype(np.float32) / 255.0
-        for name, ref in refs.items():
-            ref_f = ref.astype(np.float32) / 255.0
-            if roi_f.shape != ref_f.shape:
-                continue
-            res = cv2.matchTemplate(roi_f, ref_f, cv2.TM_CCOEFF_NORMED)
-            val = float(res.max())
-            if val > max_val:
-                best, max_val = name, val
-        if max_val >= thresh:
-            results.append((idx, best, max_val))
-        else:
-            results.append((idx, None, max_val))
-
-    return results
-
-
-def match_ssim(rois, refs, thresh):
-    results = []
-
-    for idx, roi, _, _ in rois:
-        best_match = "Unknown"
-        max_combined_score = -1.0
-        best_ssim = 0
-        best_hist = 0
-        best_edge = 0
-
-        # 预处理ROI
-        roi_gray = roi if len(roi.shape) == 2 else cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
-        roi_edges = cv2.Canny(roi_gray, 50, 150)
-        roi_hist = cv2.calcHist([roi_gray], [0], None, [256], [0, 256])
-        cv2.normalize(roi_hist, roi_hist, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
-
-        for name, ref in refs.items():
-            if roi_gray.shape != ref.shape:
-                continue
-
-            # 1. 计算SSIM相似度
-            ssim_score, _ = ssim(roi_gray, ref, full=True)
-
-            # 2. 计算直方图相似度
-            ref_hist = cv2.calcHist([ref], [0], None, [256], [0, 256])
-            cv2.normalize(
-                ref_hist, ref_hist, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX
-            )
-            hist_score = cv2.compareHist(roi_hist, ref_hist, cv2.HISTCMP_CORREL)
-
-            # 3. 计算边缘相似度
-            ref_edges = cv2.Canny(ref, 50, 150)
-            edge_intersection = np.sum(roi_edges * ref_edges)
-            edge_union = np.sum(roi_edges + ref_edges)
-            edge_score = edge_intersection / edge_union if edge_union > 0 else 0
-
-            # 加权综合评分 (可调整权重)
-            combined_score = 0.6 * ssim_score + 0.2 * hist_score + 0.2 * edge_score
-
-            if combined_score > max_combined_score:
-                best_match = name
-                max_combined_score = combined_score
-                best_ssim = ssim_score
-                best_hist = hist_score
-                best_edge = edge_score
-
-        # 动态阈值调整 (基于图像复杂度)
-        roi_complexity = np.std(roi_gray) / 255.0
-        dynamic_thresh = thresh * (1 + 0.3 * roi_complexity)
-
-        if max_combined_score >= dynamic_thresh:
-            results.append(
-                (idx, best_match, max_combined_score, best_ssim, best_hist, best_edge)
-            )
-        else:
-            results.append(
-                (idx, None, max_combined_score, best_ssim, best_hist, best_edge)
-            )
-
-    return results
-
-
-def display_matches(rois, template_results, ssim_results, refs):
-    ROW_LIMIT = 9
-    blank_ref = next(iter(refs.values()))  # 取一个参考图的尺寸
-    blank_img = Image.new(
-        "RGB", (blank_ref.shape[1], blank_ref.shape[0]), (200, 200, 200)
-    )  # 灰色占位图
-
-    combined_images = []
-    row_images = []
-
-    row_width = 0
-    max_height = 0
-
-    for item in rois:
-        idx, _, _, color_path = item
-        # Load color ROI image
-        roi_img = Image.open(color_path).convert("RGB")
-
-        # Template matching result
-        t_res = next((name for i, name, val in template_results if i == idx), None)
-        t_val = next((val for i, name, val in template_results if i == idx), 0)
-        if t_res is not None:
-            t_ref_img = Image.fromarray(refs[t_res]).convert("RGB")
-        else:
-            t_ref_img = blank_img.copy()
-
-        # SSIM result - we need to get the detailed scores
-        s_res = next((name for i, name, val in ssim_results if i == idx), None)
-        s_val = next((val for i, name, val in ssim_results if i == idx), 0)
-        if s_res is not None:
-            s_ref_img = Image.fromarray(refs[s_res]).convert("RGB")
-        else:
-            s_ref_img = blank_img.copy()
-
-        # Combine Template Matching result (left) and SSIM result (right)
-        combined_width = roi_img.width + t_ref_img.width + s_ref_img.width
-        combined_height = max(roi_img.height, t_ref_img.height, s_ref_img.height)
-
-        combined = Image.new("RGB", (combined_width, combined_height), (255, 255, 255))
-        combined.paste(roi_img, (0, 0))
-        combined.paste(t_ref_img, (roi_img.width, 0))
-        combined.paste(s_ref_img, (roi_img.width + t_ref_img.width, 0))
-
-        draw = ImageDraw.Draw(combined)
-        font = ImageFont.truetype("msyh.ttc", 20)
-
-        # Get the detailed scores from the SSIM matching results
-        ssim_details = next(
-            (details for i, details in enumerate(ssim_results) if i == idx),
-            (idx, None, 0, 0, 0, 0),  # Default values if not found
-        )
-        best_ssim = ssim_details[3] if len(ssim_details) > 3 else 0
-        best_hist = ssim_details[4] if len(ssim_details) > 4 else 0
-        best_edge = ssim_details[5] if len(ssim_details) > 5 else 0
-
-        label = (
-            f"ROI {idx} {best_ssim:.3f}{best_hist:.3f} {best_edge:.3f}\n"
-            f"T({t_res if t_res else 'None'}, {t_val:.3f})\n"
-            f"S({s_res if s_res else 'None'}, {s_val:.3f})"
-        )
-
-        if t_res == s_res:
-            draw.text(
-                (roi_img.width, t_ref_img.height), label, fill=(255, 0, 0), font=font
-            )
-        else:
-            draw.text(
-                (roi_img.width, t_ref_img.height), label, fill=(255, 0, 255), font=font
-            )
-        combined = ImageOps.expand(combined, border=2, fill=(0, 0, 0))
-        row_images.append(combined)
-        row_width += combined_width
-        max_height = max(max_height, combined_height)
-
-        if len(row_images) == ROW_LIMIT or idx == len(rois) - 1:
-            row_img = Image.new("RGB", (row_width, max_height), (255, 255, 255))
-            x_offset = 0
-            for img in row_images:
-                row_img.paste(img, (x_offset, 0))
-                x_offset += img.width
-            combined_images.append(row_img)
-            row_images = []
-            row_width = 0
-            max_height = 0
-
-    total_width = max(img.width for img in combined_images)
-    total_height = sum(img.height for img in combined_images)
-
-    final_img = Image.new("RGB", (total_width, total_height), (255, 255, 255))
-    y_offset = 0
-    for img in combined_images:
-        final_img.paste(img, (0, y_offset))
-        y_offset += img.height
-
-    final_img.save("depot_test/output/matches_all.png")
-
-
-if __name__ == "__main__":
-    refs = load_references(REF_TABLE_JSON, ICON_DIR)
-    rois = detect_and_crop(IMG_PATH)
-
-    now = datetime.now()
-
-    Template_MATCH_THRESHOLD = 0.2
-    print("\n=== 使用 Template Matching ===")
-    results_template = match_template(rois, refs, Template_MATCH_THRESHOLD)
-    print("\n模版匹配耗时:", datetime.now() - now)
-    SSIM_MATCH_THRESHOLD = 0.05
-
-    now = datetime.now()
-    print("\n=== 使用 SSIM ===")
-    results_ssim = match_ssim(rois, refs, SSIM_MATCH_THRESHOLD)
-    print("\nSSIM匹配耗时:", datetime.now() - now)
-
-    print("\n=== 结果对比 ===")
-    for idx, _, _, _ in rois:
-        # Template matching results
-        t_res = next((name for i, name, val in results_template if i == idx), None)
-        t_val = next((val for i, name, val in results_template if i == idx), 0)
-
-        # SSIM results - now includes detailed metrics
-        s_res = next(
-            (name for i, name, val, ssim, hist, edge in results_ssim if i == idx), None
-        )
-        s_val = next(
-            (val for i, name, val, ssim, hist, edge in results_ssim if i == idx), 0
-        )
-        s_ssim = next(
-            (ssim for i, name, val, ssim, hist, edge in results_ssim if i == idx), 0
-        )
-        s_hist = next(
-            (hist for i, name, val, ssim, hist, edge in results_ssim if i == idx), 0
-        )
-        s_edge = next(
-            (edge for i, name, val, ssim, hist, edge in results_ssim if i == idx), 0
-        )
-
-        print(
-            f"ROI {idx}:\n"
-            f"  Template=({t_res if t_res else 'None'}, {t_val:.3f})\n"
-            f"  SSIM=({s_res if s_res else 'None'}, {s_val:.3f})\n"
-            f"  Details: SSIM={s_ssim:.3f}, Hist={s_hist:.3f}, Edge={s_edge:.3f}"
-        )
-
-    # Displaying matches side by side using the updated function
-    display_matches(rois, results_template, results_ssim, refs)
--- a/depot_test/仓库识别模版匹配+HOG
+++ b/depot_test/仓库识别模版匹配+HOG
--- a/depot_test/仓库识别模版匹配.py
+++ b/depot_test/仓库识别模版匹配.py
@ -1,192 +0,0 @@
-import json
-import os
-from datetime import datetime
-from multiprocessing import Pool
-
-import cv2
-import numpy as np
-from PIL import Image, ImageDraw, ImageFont, ImageOps
-
-now = datetime.now()
-# 配置路径
-REF_DIR = r"depot_test\output/test/origin"
-ROI_DIR = r"depot_test\output/test/result"
-IMG_PATH = r"depot_test\stitched_image_multi.png"
-REF_TABLE_JSON = "./ArknightsGameData/zh_CN/gamedata/excel/item_table.json"
-ICON_DIR = "./ArknightsResource/items/"
-
-# SSIM阈值
-MATCH_THRESHOLD = 0.01
-
-# 圆检测参数
-HOUGH_PARAMS = dict(
-    dp=5, minDist=230, param1=50, param2=30, minRadius=90, maxRadius=100
-)
-CROP_SIZE = 130
-BORDER = 26
-SECONDARY_SLICE = (slice(30, 140), slice(50, 160))
-
-
-def load_references(table_json, icon_dir):
-    data = json.load(open(table_json, encoding="utf-8"))
-    refs = {}
-    size = CROP_SIZE * 2 - BORDER * 2
-    for item in data.get("items", {}).values():
-        t = item.get("classifyType")
-        if t not in {"NORMAL", "CONSUME", "MATERIAL"}:
-            continue
-        path = os.path.join(icon_dir, f"{item['iconId']}.png")
-        if not os.path.exists(path):
-            continue
-        im = Image.open(path).resize((size, size)).crop((50, 30, 160, 140)).convert("L")
-        refs[item["name"]] = np.array(im)
-    print(f"已加载 {len(refs)} 个参考图，保存于 {REF_DIR}")
-    return refs
-
-
-def process_circle(idx, circle, img, rois, size, dr):
-    x, y, r = circle
-    crop = img[
-        max(0, y - CROP_SIZE) : min(img.shape[0], y + CROP_SIZE),
-        max(0, x - CROP_SIZE) : min(img.shape[1], x + CROP_SIZE),
-    ]
-    c = crop[BORDER:-BORDER, BORDER:-BORDER]
-    sec = c[SECONDARY_SLICE[0], SECONDARY_SLICE[1]]
-    gray_sec = cv2.cvtColor(sec, cv2.COLOR_BGR2GRAY)
-
-    # 保存 ROI 图像
-    os.makedirs(ROI_DIR, exist_ok=True)
-    roi_path = os.path.join(ROI_DIR, f"roi_{idx}.png")
-    cv2.imwrite(roi_path, gray_sec)
-    rois.append(gray_sec)
-    return idx, gray_sec, roi_path
-
-
-def detect_and_crop(image_path):
-    img = cv2.imread(image_path)
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, **HOUGH_PARAMS)
-    if circles is None:
-        print("未检测到圆形区域")
-        return []
-    circles = np.round(circles[0]).astype(int)
-    rois = []
-    size = CROP_SIZE * 2 - BORDER * 2
-    dr = img.max() - img.min()
-
-    # 使用多进程加速
-    with Pool() as pool:
-        results = pool.starmap(
-            process_circle,
-            [(idx, circle, img, rois, size, dr) for idx, circle in enumerate(circles)],
-        )
-
-    return [roi for idx, roi, path in results]
-
-
-def process_match(idx, roi, refs, thresh):
-    best, max_val = "Unknown", -1.0
-    # 模板匹配需要浮点图
-    roi_f = roi.astype(np.float32) / 255.0
-    for name, ref in refs.items():
-        # ref 已经是灰度 NumPy 数组
-        ref_f = ref.astype(np.float32) / 255.0
-        if roi_f.shape != ref_f.shape:
-            continue
-        # 使用 TM_CCOEFF_NORMED，结果越接近 1 越匹配
-        res = cv2.matchTemplate(roi_f, ref_f, cv2.TM_CCOEFF_NORMED)
-        val = float(res.max())
-        if val > max_val:
-            best, max_val = name, val
-    if max_val >= thresh:
-        return idx, best, max_val
-    return idx, None, max_val
-
-
-def match_ssim(rois, refs, thresh=MATCH_THRESHOLD):
-    from multiprocessing import Pool
-
-    args = [(idx, roi, refs, thresh) for idx, roi in enumerate(rois)]
-    with Pool(processes=5) as pool:
-        results = pool.starmap(process_match, args)
-
-    stats = {}
-    match_idx = {}
-    for idx, name, score in results:
-        if name:
-            stats[name] = stats.get(name, 0) + 1
-            match_idx[idx] = name
-        print(f"ROI {idx} 匹配结果: {name if name else 'Unknown'} (SSIM={score:.3f})")
-    return stats, match_idx
-
-
-def display_matches(rois, match_idx, refs):
-    ROW_LIMIT = 10  # 每行最多10个
-    blank_ref = next(iter(refs.values()))  # 取一个参考图的尺寸
-    blank_img = Image.new(
-        "RGB", (blank_ref.shape[1], blank_ref.shape[0]), (200, 200, 200)
-    )  # 灰色占位图
-
-    combined_images = []
-    row_images = []
-
-    row_width = 0
-    max_height = 0
-
-    for idx in range(len(rois)):
-        roi_img = Image.fromarray(rois[idx]).convert("RGB")
-        ref_name = match_idx.get(idx)
-        if ref_name:
-            ref_img = Image.fromarray(refs[ref_name]).convert("RGB")
-        else:
-            ref_img = blank_img.copy()
-
-        combined_width = roi_img.width + ref_img.width
-        combined_height = max(roi_img.height, ref_img.height)
-
-        combined = Image.new("RGB", (combined_width, combined_height), (255, 255, 255))
-        combined.paste(roi_img, (0, 0))
-        combined.paste(ref_img, (roi_img.width, 0))
-
-        draw = ImageDraw.Draw(combined)
-
-        font = ImageFont.truetype("msyh.ttc", 20)
-
-        label = f"ROI {idx}: {ref_name if ref_name else 'Unknown'}"
-        draw.text((5, 5), label, fill=(255, 0, 0), font=font)
-        combined = ImageOps.expand(combined, border=2, fill=(0, 0, 0))
-        row_images.append(combined)
-        row_width += combined_width
-        max_height = max(max_height, combined_height)
-
-        if len(row_images) == ROW_LIMIT or idx == len(rois) - 1:
-            row_img = Image.new("RGB", (row_width, max_height), (255, 255, 255))
-            x_offset = 0
-            for img in row_images:
-                row_img.paste(img, (x_offset, 0))
-                x_offset += img.width
-            combined_images.append(row_img)
-            row_images = []
-            row_width = 0
-            max_height = 0
-
-    total_width = max(img.width for img in combined_images)
-    total_height = sum(img.height for img in combined_images)
-
-    final_img = Image.new("RGB", (total_width, total_height), (255, 255, 255))
-    y_offset = 0
-    for img in combined_images:
-        final_img.paste(img, (0, y_offset))
-        y_offset += img.height
-
-    final_img.save("all_matches.png")  # 或 final_img.save("all_matches.png")
-
-
-if __name__ == "__main__":
-    refs = load_references(REF_TABLE_JSON, ICON_DIR)
-    rois = detect_and_crop(IMG_PATH)
-    res, match_idx = match_ssim(rois, refs)
-
-    print("最终识别结果:", res)
-    display_matches(rois, match_idx, refs)
-    print(datetime.now() - now)
--- a/depot_test/训练.py
+++ b/depot_test/训练.py
@ -0,0 +1,130 @@
+import os
+import random
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+from tqdm import tqdm
+
+
+# 1. 网络定义
+class SiameseNetwork(nn.Module):
+    def __init__(self):
+        super(SiameseNetwork, self).__init__()
+        self.cnn = nn.Sequential(
+            nn.Conv2d(3, 32, kernel_size=5),  # (3,110,110) -> (32,106,106)
+            nn.ReLU(),
+            nn.MaxPool2d(2),  # -> (32,53,53)
+            nn.Conv2d(32, 64, kernel_size=5),  # -> (64,49,49)
+            nn.ReLU(),
+            nn.MaxPool2d(2),  # -> (64,24,24)
+        )
+        self.fc = nn.Sequential(
+            nn.Linear(64 * 24 * 24, 512), nn.ReLU(), nn.Linear(512, 128)
+        )
+
+    def forward_once(self, x):
+        x = self.cnn(x)
+        x = x.view(x.size(0), -1)
+        return self.fc(x)
+
+    def forward(self, input1, input2):
+        output1 = self.forward_once(input1)
+        output2 = self.forward_once(input2)
+        return output1, output2
+
+
+# 2. Contrastive Loss
+class ContrastiveLoss(nn.Module):
+    def __init__(self, margin=2.0):
+        super(ContrastiveLoss, self).__init__()
+        self.margin = margin
+
+    def forward(self, out1, out2, label):
+        dist = F.pairwise_distance(out1, out2)
+        loss = label * torch.pow(dist, 2) + (1 - label) * torch.pow(
+            torch.clamp(self.margin - dist, min=0.0), 2
+        )
+        return loss.mean()
+
+
+# 3. Dataset 构造器
+class SiameseDataset(Dataset):
+    def __init__(self, folder_path, transform=None):
+        self.folder_path = folder_path
+        self.classes = os.listdir(folder_path)
+        self.transform = transform or transforms.ToTensor()
+
+    def __getitem__(self, index):
+        class1 = random.choice(self.classes)
+        class2 = (
+            class1
+            if random.random() < 0.5
+            else random.choice([c for c in self.classes if c != class1])
+        )
+
+        img1_path = os.path.join(
+            self.folder_path,
+            class1,
+            random.choice(os.listdir(os.path.join(self.folder_path, class1))),
+        )
+        img2_path = os.path.join(
+            self.folder_path,
+            class2,
+            random.choice(os.listdir(os.path.join(self.folder_path, class2))),
+        )
+
+        img1 = Image.open(img1_path).convert("RGB").resize((110, 110))
+        img2 = Image.open(img2_path).convert("RGB").resize((110, 110))
+
+        if self.transform:
+            img1 = self.transform(img1)
+            img2 = self.transform(img2)
+
+        label = 1.0 if class1 == class2 else 0.0
+        return img1, img2, torch.tensor([label], dtype=torch.float32)
+
+    def __len__(self):
+        return 5000
+
+
+# 4. 训练入口
+def train():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(device)
+    transform = transforms.Compose(
+        [
+            transforms.ToTensor(),
+        ]
+    )
+
+    dataset = SiameseDataset("dataset/train", transform=transform)
+    dataloader = DataLoader(dataset, batch_size=128, shuffle=True)
+
+    model = SiameseNetwork().to(device)
+    criterion = ContrastiveLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+    for epoch in range(100):
+        total_loss = 0
+        for img1, img2, label in tqdm(dataloader, desc=f"Epoch {epoch + 1}"):
+            img1, img2, label = img1.to(device), img2.to(device), label.to(device)
+
+            out1, out2 = model(img1, img2)
+            loss = criterion(out1, out2, label)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+
+        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(dataloader):.4f}")
+
+    torch.save(model.state_dict(), "siamese_model.pth")
+    print("Model saved as siamese_model.pth")
+
+
+if __name__ == "__main__":
+    train()
--- a/depot_test/预测.py
+++ b/depot_test/预测.py
@ -0,0 +1,74 @@
+import os
+
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from torchvision import transforms
+from 训练 import SiameseNetwork  # 确保和训练脚本在同一目录
+
+
+# 加载模型
+def load_model(model_path, device):
+    model = SiameseNetwork().to(device)
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.eval()
+    return model
+
+# 读取训练集中每个类别的图像（作为支持集）
+def load_train_class_images(train_dir, transform, device):
+    class_images = {}
+    for class_name in os.listdir(train_dir):
+        class_path = os.path.join(train_dir, class_name)
+        if not os.path.isdir(class_path):
+            continue
+        images = []
+        for img_name in os.listdir(class_path):
+            img_path = os.path.join(class_path, img_name)
+            image = Image.open(img_path).convert("RGB").resize((110, 110))
+            image = transform(image).unsqueeze(0).to(device)  # shape: (1,3,110,110)
+            images.append(image)
+        if images:
+            class_images[class_name] = images
+    return class_images
+
+# 推理函数：返回最相似的类别名
+def predict(model, test_img, class_images):
+    min_dist = float("inf")
+    predicted_class = None
+
+    with torch.no_grad():
+        for class_name, ref_images in class_images.items():
+            for ref_img in ref_images:
+                out1, out2 = model(test_img, ref_img)
+                dist = F.pairwise_distance(out1, out2)
+                if dist.item() < min_dist:
+                    min_dist = dist.item()
+                    predicted_class = class_name
+    return predicted_class, min_dist
+
+# 主推理流程
+def infer():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = load_model("siamese_model.pth", device)
+
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+    ])
+
+    train_dir = "dataset/train"
+    test_dir = "dataset/test"
+    class_images = load_train_class_images(train_dir, transform, device)
+
+    print("开始测试...")
+    for class_name in os.listdir(test_dir):
+        class_path = os.path.join(test_dir, class_name)
+        for img_name in os.listdir(class_path):
+            img_path = os.path.join(class_path, img_name)
+            img = Image.open(img_path).convert("RGB").resize((110, 110))
+            img_tensor = transform(img).unsqueeze(0).to(device)
+
+            predicted_class, dist = predict(model, img_tensor, class_images)
+            print(f"Test Image: {img_name} | True: {class_name} | Predicted: {predicted_class} | Distance: {dist:.4f}")
+
+if __name__ == "__main__":
+    infer()