rknn替换，板子是3568的

2025-11-03 16:10:50 +08:00
parent d3a5cbfad0
commit 5b29081c06
23 changed files with 799 additions and 44 deletions
--- a/ailai_pc/best1.pt
+++ b/ailai_pc/best1.pt
--- a/ailai_pc/22222.jpg
+++ b/ailai_pc/22222.jpg
--- a/ailai_pc/best12.pt
+++ b/ailai_pc/best12.pt
--- a/ailai_pc/detect.pt
+++ b/ailai_pc/detect.pt
--- a/ailai_pc/detect1.pt
+++ b/ailai_pc/detect1.pt
--- a/ailai_pc/detect2.pt
+++ b/ailai_pc/detect2.pt
--- a/ailai_pc/detet_pc.py
+++ b/ailai_pc/detet_pc.py
@ -1,60 +1,70 @@
-# detect_pt.py
-import cv2
-import torch
 from ultralytics import YOLO
+from ultralytics.utils.ops import non_max_suppression
+import torch
+import cv2

 # ======================
 # 配置参数
 # ======================
-MODEL_PATH = 'best.pt'           # 你的训练模型路径（yolov8n.pt 或你自己训练的）
-#IMG_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/train/192.168.0.234_01_202510141514352.jpg'            # 测试图像路径
+MODEL_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/best12.pt'
 IMG_PATH = '1.jpg'
-OUTPUT_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/output_pt.jpg'    # 可视化结果保存路径
-CONF_THRESH = 0.5                # 置信度阈值
-CLASS_NAMES = ['bag']          # 你的类别名列表（按训练时顺序）
-
-# 是否显示窗口（适合有 GUI 的 PC）
-SHOW_IMAGE = True
+OUTPUT_PATH = 'output_pt.jpg'
+CONF_THRESH = 0.5
+IOU_THRESH = 0.45
+CLASS_NAMES = ['bag']

 # ======================
-# 主函数
+# 主函数（优化版）
 # ======================
 def main():
-    # 检查 CUDA
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"✅ 使用设备: {device}")

    # 加载模型
-    print("➡️  加载 YOLO 模型...")
-    model = YOLO(MODEL_PATH)  # 自动加载架构和权重
+    model = YOLO(MODEL_PATH)
    model.to(device)

-    # 推理
+    # 推理：获取原始结果（不立即解析）
    print("➡️ 开始推理...")
-    results = model(IMG_PATH, imgsz=640, conf=CONF_THRESH, device=device)
+    results = model(IMG_PATH, imgsz=640, conf=CONF_THRESH, device=device, verbose=True)

    # 获取第一张图的结果
    r = results[0]

-    # 获取原始图像（BGR）
+    # 🚀 关键：使用原始 tensor 在 GPU 上处理
+    # pred: [x1, y1, x2, y2, conf, cls] 形状为 [num_boxes, 6]
+    pred = r.boxes.data  # 已经在 GPU 上，类型: torch.Tensor
+
+    # 🔍 在 GPU 上做 NMS（这才是正确姿势）
+    # 注意：non_max_suppression 输入是 [batch, num_boxes, 6]
+    det = non_max_suppression(
+        pred.unsqueeze(0),      # 增加 batch 维度
+        conf_thres=CONF_THRESH,
+        iou_thres=IOU_THRESH,
+        classes=None,
+        agnostic=False,
+        max_det=100
+    )[0]  # 取第一个（也是唯一一个）batch
+
+    # ✅ 此时所有后处理已完成，现在才从 GPU 拷贝到 CPU
+    if det is not None and len(det):
+        det = det.cpu().numpy()  # ← 只拷贝一次！
+    else:
+        det = []
+
+    # 读取图像
    img = cv2.imread(IMG_PATH)
    if img is None:
        raise FileNotFoundError(f"无法读取图像: {IMG_PATH}")

    print("\n📋 检测结果:")
-    for box in r.boxes:
-        # 获取数据
-        xyxy = box.xyxy[0].cpu().numpy()  # [x1, y1, x2, y2]
-        conf = box.conf.cpu().numpy()[0]  # 置信度
-        cls_id = int(box.cls.cpu().numpy()[0])  # 类别 ID
-        cls_name = CLASS_NAMES[cls_id]         # 类别名
-
+    for *xyxy, conf, cls_id in det:
        x1, y1, x2, y2 = map(int, xyxy)
+        cls_name = CLASS_NAMES[int(cls_id)]
        print(f"  类别: {cls_name}, 置信度: {conf:.3f}, 框: [{x1}, {y1}, {x2}, {y2}]")

-        # 画框
+        # 画框和标签
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
-        # 画标签
        label = f"{cls_name} {conf:.2f}"
        cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

@ -62,11 +72,5 @@ def main():
    cv2.imwrite(OUTPUT_PATH, img)
    print(f"\n🖼️ 可视化结果已保存: {OUTPUT_PATH}")

-    # 显示（可选）
-    if SHOW_IMAGE:
-        cv2.imshow("YOLOv8 Detection", img)
-        cv2.waitKey(0)
-        cv2.destroyAllWindows()
-
 if __name__ == '__main__':
    main()
--- a/ailai_pc/detet_pc_f.py
+++ b/ailai_pc/detet_pc_f.py
@ -0,0 +1,135 @@
+from ultralytics import YOLO
+from ultralytics.utils.ops import non_max_suppression
+import torch
+import cv2
+import os
+import time
+from pathlib import Path
+
+# ======================
+# 配置参数
+# ======================
+MODEL_PATH = 'detect.pt'  # 你的模型路径
+INPUT_FOLDER = '/home/hx/开发/ailai_image_obb/ailai_pc/train'  # 输入图片文件夹
+OUTPUT_FOLDER = '/home/hx/开发/ailai_image_obb/ailai_pc/results'  # 输出结果文件夹（自动创建）
+CONF_THRESH = 0.5
+IOU_THRESH = 0.45
+CLASS_NAMES = ['bag']
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+IMG_SIZE = 640
+SHOW_IMAGE = False  # 是否逐张显示图像（适合调试）
+
+# 支持的图像格式
+IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
+
+
+# ======================
+# 获取文件夹中所有图片路径
+# ======================
+def get_image_paths(folder):
+    folder = Path(folder)
+    if not folder.exists():
+        raise FileNotFoundError(f"输入文件夹不存在: {folder}")
+    paths = [p for p in folder.iterdir() if p.suffix.lower() in IMG_EXTENSIONS]
+    if not paths:
+        print(f"⚠️  在 {folder} 中未找到图片")
+    return sorted(paths)  # 按名称排序
+
+
+# ======================
+# 主函数（批量推理）
+# ======================
+def main():
+    print(f"✅ 使用设备: {DEVICE}")
+
+    # 创建输出文件夹
+    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
+    print(f"📁 输出结果将保存到: {OUTPUT_FOLDER}")
+
+    # 加载模型
+    print("➡️ 加载 YOLO 模型...")
+    model = YOLO(MODEL_PATH)
+    model.to(DEVICE)
+
+    # 获取图片列表
+    img_paths = get_image_paths(INPUT_FOLDER)
+    if not img_paths:
+        return
+
+    print(f"📸 共找到 {len(img_paths)} 张图片，开始批量推理...\n")
+
+    total_start_time = time.time()
+
+    for idx, img_path in enumerate(img_paths, 1):
+        print(f"{'=' * 50}")
+        print(f"🖼️  处理第 {idx}/{len(img_paths)} 张: {img_path.name}")
+
+        # 手动计时
+        start_time = time.time()
+
+        # 推理（verbose=True 输出内部耗时）
+        results = model(str(img_path), imgsz=IMG_SIZE, conf=CONF_THRESH, device=DEVICE, verbose=True)
+        inference_time = time.time() - start_time
+
+        # 获取结果
+        r = results[0]
+        pred = r.boxes.data  # GPU 上的原始输出
+
+        # 在 GPU 上做 NMS
+        det = non_max_suppression(
+            pred.unsqueeze(0),
+            conf_thres=CONF_THRESH,
+            iou_thres=IOU_THRESH,
+            classes=None,
+            agnostic=False,
+            max_det=100
+        )[0]
+
+        # 拷贝到 CPU（仅一次）
+        if det is not None and len(det):
+            det = det.cpu().numpy()
+        else:
+            det = []
+
+        # 读取图像并绘制
+        img = cv2.imread(str(img_path))
+        if img is None:
+            print(f"❌ 无法读取图像: {img_path}")
+            continue
+
+        print(f"\n📋 检测结果:")
+        for *xyxy, conf, cls_id in det:
+            x1, y1, x2, y2 = map(int, xyxy)
+            cls_name = CLASS_NAMES[int(cls_id)]
+            print(f"  类别: {cls_name}, 置信度: {conf:.3f}, 框: [{x1}, {y1}, {x2}, {y2}]")
+            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            label = f"{cls_name} {conf:.2f}"
+            cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+
+        # 保存结果
+        output_path = os.path.join(OUTPUT_FOLDER, f"result_{img_path.name}")
+        cv2.imwrite(output_path, img)
+        print(f"\n✅ 结果已保存: {output_path}")
+
+        # 显示（可选）
+        if SHOW_IMAGE:
+            cv2.imshow("Detection", img)
+            if cv2.waitKey(1) & 0xFF == ord('q'):  # 按 Q 退出
+                break
+
+        # 输出总耗时
+        total_infer_time = time.time() - start_time
+        print(f"⏱️  总处理时间: {total_infer_time * 1000:.1f}ms (推理+后处理)")
+
+    # 结束
+    total_elapsed = time.time() - total_start_time
+    print(f"\n🎉 批量推理完成！共处理 {len(img_paths)} 张图片，总耗时: {total_elapsed:.2f} 秒")
+    print(
+        f"🚀 平均每张: {total_elapsed / len(img_paths) * 1000:.1f} ms ({1 / (total_elapsed / len(img_paths)):.1f} FPS)")
+
+    if SHOW_IMAGE:
+        cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+    main()
--- a/ailai_pc/error_test.py
+++ b/ailai_pc/error_test.py
@ -0,0 +1,122 @@
+import cv2
+import numpy as np
+import os
+from ultralytics import YOLO
+
+# ====================== 用户配置 ======================
+MODEL_PATH = '11.pt'
+IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2/train'   # 验证集图片目录
+LABEL_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2/train'          # 标签目录（与图片同名 .txt）
+OUTPUT_DIR = './output_images'
+
+
+IMG_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.tif', '.webp'}
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ====================== 可视化函数 ======================
+def draw_keypoints_on_image(image, kpts_xy, colors, label_prefix=''):
+    for j, (x, y) in enumerate(kpts_xy):
+        x, y = int(x), int(y)
+        cv2.circle(image, (x, y), 8, colors[j % len(colors)], -1)
+        cv2.putText(image, f'{label_prefix}{j+1}', (x + 10, y - 10),
+                    cv2.FONT_HERSHEY_SIMPLEX, 1, colors[j % len(colors)], 2)
+    return image
+
+# ====================== 标签读取函数 ======================
+def load_keypoints_from_label(label_path, img_shape):
+    """
+    标签格式:
+    <class> xc yc w h x1 y1 v1 x2 y2 v2 x3 y3 v3 x4 y4 v4
+    共 17 项：1 + 4 + 12
+    """
+    if not os.path.exists(label_path):
+        print(f"⚠️ 找不到标签文件: {label_path}")
+        return None
+
+    H, W = img_shape[:2]
+    with open(label_path, 'r') as f:
+        line = f.readline().strip().split()
+
+    if len(line) < 17:
+        print(f"⚠️ 标签长度不足: {label_path} ({len(line)}项)")
+        return None
+
+    floats = [float(x) for x in line[5:]]  # 跳过前5个（class + bbox）
+    coords = np.array(floats).reshape(-1, 3)[:, :2]  # (4,2)
+    coords[:, 0] *= W
+    coords[:, 1] *= H
+    return coords
+
+# ====================== 主程序 ======================
+if __name__ == "__main__":
+    print("🚀 开始验证集关键点误差计算")
+
+    model = YOLO(MODEL_PATH)
+    print(f"✅ 模型加载完成: {MODEL_PATH}")
+
+    image_files = [
+        f for f in os.listdir(IMAGE_SOURCE_DIR)
+        if os.path.splitext(f.lower())[1] in IMG_EXTENSIONS
+    ]
+
+    if not image_files:
+        print("❌ 未找到图像文件")
+        exit(1)
+
+    total_errors = []
+    skipped = 0
+    colors_gt = [(0, 255, 0), (0, 200, 0), (0, 150, 0), (0, 100, 0)]
+    colors_pred = [(0, 0, 255)] * 4
+
+    for img_filename in image_files:
+        img_path = os.path.join(IMAGE_SOURCE_DIR, img_filename)
+        label_path = os.path.join(LABEL_DIR, os.path.splitext(img_filename)[0] + '.txt')
+
+        img = cv2.imread(img_path)
+        if img is None:
+            print(f"❌ 无法读取图像: {img_path}")
+            skipped += 1
+            continue
+
+        gt_kpts = load_keypoints_from_label(label_path, img.shape)
+        if gt_kpts is None or len(gt_kpts) < 4:
+            print(f"⚠️ 跳过 {img_filename}：标签点不足")
+            skipped += 1
+            continue
+
+        results = model(img, verbose=False)
+        if not results or results[0].keypoints is None or len(results[0].keypoints) == 0:
+            print(f"⚠️ {img_filename}: 无检测结果，跳过")
+            skipped += 1
+            continue
+
+        pred_kpts = results[0].keypoints.xy[0].cpu().numpy()
+        if pred_kpts.shape[0] != gt_kpts.shape[0]:
+            print(f"⚠️ {img_filename}: 点数不匹配 GT={len(gt_kpts)}, Pred={len(pred_kpts)}，跳过")
+            skipped += 1
+            continue
+
+        # 计算误差
+        errors = np.linalg.norm(pred_kpts - gt_kpts, axis=1)
+        mean_error = np.mean(errors)
+        total_errors.append(mean_error)
+
+        print(f"📸 {img_filename}: 每点误差={np.round(errors, 2)} 像素, 平均误差={mean_error:.2f}px")
+
+        # 可视化
+        vis_img = img.copy()
+        vis_img = draw_keypoints_on_image(vis_img, gt_kpts, colors_gt, label_prefix='GT')
+        vis_img = draw_keypoints_on_image(vis_img, pred_kpts, colors_pred, label_prefix='P')
+
+        save_path = os.path.join(OUTPUT_DIR, f"compare_{img_filename}")
+        cv2.imwrite(save_path, vis_img)
+
+    # ====================== 结果统计 ======================
+    print("\n======================")
+    if total_errors:
+        print(f"🎯 有效样本数: {len(total_errors)} 张")
+        print(f"🚫 跳过样本数: {skipped} 张")
+        print(f"📈 平均关键点误差: {np.mean(total_errors):.2f} 像素")
+    else:
+        print(f"⚠️ 所有样本均被跳过（跳过 {skipped} 张）")
+    print("======================")
--- a/ailai_pc/jianzhi/best.pt
+++ b/ailai_pc/jianzhi/best.pt
--- a/ailai_pc/jianzhi/jz.pt
+++ b/ailai_pc/jianzhi/jz.pt
--- a/ailai_pc/jianzhi/jz.py
+++ b/ailai_pc/jianzhi/jz.py
@ -0,0 +1,95 @@
+import torch
+import torch.nn as nn
+from ultralytics import YOLO
+
+# ------------------- 核心剪枝函数 -------------------
+def prune_conv_bn(conv_bn, keep_idx):
+    """剪枝 ConvBNAct 模块的 Conv + BN"""
+    conv = conv_bn.conv
+    bn = conv_bn.bn
+
+    # 跳过 depthwise
+    if conv.groups != 1:
+        return conv_bn
+
+    # 剪枝 conv
+    new_conv = nn.Conv2d(
+        in_channels=conv.in_channels,
+        out_channels=len(keep_idx),
+        kernel_size=conv.kernel_size,
+        stride=conv.stride,
+        padding=conv.padding,
+        dilation=conv.dilation,
+        groups=conv.groups,
+        bias=(conv.bias is not None)
+    ).to(conv.weight.device)
+    new_conv.weight.data = conv.weight.data[keep_idx].clone()
+    if conv.bias is not None:
+        new_conv.bias.data = conv.bias.data[keep_idx].clone()
+
+    # 剪枝 BN
+    if bn is not None:
+        new_bn = nn.BatchNorm2d(len(keep_idx)).to(bn.weight.device)
+        new_bn.weight.data = bn.weight.data[keep_idx].clone()
+        new_bn.bias.data   = bn.bias.data[keep_idx].clone()
+        new_bn.running_mean = bn.running_mean[keep_idx].clone()
+        new_bn.running_var  = bn.running_var[keep_idx].clone()
+    else:
+        new_bn = None
+
+    # 替换模块
+    conv_bn.conv = new_conv
+    conv_bn.bn = new_bn
+    return conv_bn
+
+def get_prune_idx(conv_bn, prune_ratio=0.3):
+    """根据 BN gamma 或 L2 norm 计算要保留的通道索引"""
+    conv = conv_bn.conv
+    bn = conv_bn.bn
+    if bn is not None:
+        gamma = bn.weight.data.abs()
+    else:
+        gamma = conv.weight.data.view(conv.out_channels, -1).norm(p=2, dim=1)
+    keep_num = max(int(conv.out_channels * (1 - prune_ratio)), 1)
+    _, idxs = torch.topk(gamma, keep_num)
+    return idxs
+
+def prune_yolov11_model(model, prune_ratio=0.3):
+    """遍历 YOLO 模型，剪枝所有 ConvBNAct"""
+    for name, m in model.named_modules():
+        if m.__class__.__name__ == "ConvBNAct":
+            keep_idx = get_prune_idx(m, prune_ratio)
+            prune_conv_bn(m, keep_idx)
+    return model
+
+# ------------------- 主流程 -------------------
+def main(model_path="best.pt", save_path="yolov11_pruned_ts.pt",
+         prune_ratio=0.3, device="cuda"):
+
+    # 加载 YOLO 模型
+    model = YOLO(model_path).model
+    model.eval().to(device)
+
+    # 剪枝
+    print(f"✅ 开始剪枝，比例: {prune_ratio}")
+    model = prune_yolov11_model(model, prune_ratio)
+    print("✅ 剪枝完成")
+
+    # 构造 dummy 输入
+    example_inputs = torch.randn(1, 3, 640, 640).to(device)
+
+    # TorchScript 跟踪
+    print("🔹 开始 TorchScript 跟踪...")
+    traced_model = torch.jit.trace(model, example_inputs)
+    traced_model = torch.jit.optimize_for_inference(traced_model)
+
+    # 保存 TorchScript 模型
+    traced_model.save(save_path)
+    print(f"✅ TorchScript 剪枝模型已保存: {save_path}")
+
+if __name__ == "__main__":
+    main(
+        model_path="best.pt",
+        save_path="yolov11_pruned_ts.pt",
+        prune_ratio=0.3
+    )
--- a/ailai_pc/jianzhi/yolov11_pruned.pt
+++ b/ailai_pc/jianzhi/yolov11_pruned.pt
--- a/ailai_pc/last.pt
+++ b/ailai_pc/last.pt
--- a/ailai_pc/lingshi.py
+++ b/ailai_pc/lingshi.py
@ -0,0 +1,8 @@
+from ultralytics import YOLO
+import torch
+model = YOLO("/home/hx/开发/ailai_image_obb/ailai_pc/detect.pt")
+
+for name, module in model.model.named_modules():
+    if isinstance(module, torch.nn.Conv2d):
+        w = module.weight
+        print(f"{name} -> min: {w.min().item():.3f}, max: {w.max().item():.3f}")
--- a/ailai_pc/output_images/offset_1.jpg
+++ b/ailai_pc/output_images/offset_1.jpg
--- a/ailai_pc/point_test.py
+++ b/ailai_pc/point_test.py
@ -5,7 +5,7 @@ import os

 # ====================== 用户配置 ======================
 MODEL_PATH = 'best.pt'
-IMAGE_SOURCE_DIR = './train'  # 👈 修改为你的图像文件夹路径
+IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2/val'  # 👈 修改为你的图像文件夹路径
 OUTPUT_DIR = './output_images'  # 保存结果的文件夹

 # 支持的图像扩展名
--- a/ailai_pc/rename_file.py
+++ b/ailai_pc/rename_file.py
@ -2,10 +2,10 @@ import os
 import shutil

 # ================= 用户配置 =================
-FOLDER_PATH = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2'  # 图片和 txt 所在文件夹
+FOLDER_PATH = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/20251020'  # 图片和 txt 所在文件夹
 IMG_EXT = '.jpg'
 TXT_EXT = '.txt'
-START_NUM = 1            # 从 1 开始编号
+START_NUM = 571            # 从 1 开始编号

 # ================= 获取文件列表 =================
 files = os.listdir(FOLDER_PATH)
--- a/ailai_pc/yolo_obb_dataset/11111.py
+++ b/ailai_pc/yolo_obb_dataset/11111.py
@ -0,0 +1,138 @@
+import os
+import cv2
+import numpy as np
+from rknnlite.api import RKNNLite
+
+# ====================== 配置 ======================
+MODEL_PATH = "yolo11.rknn"      # RKNN 模型路径
+IMG_PATH = "11.jpg"             # 待检测图片
+IMG_SIZE = (640, 640)           # 模型输入尺寸 (w,h)
+OBJ_THRESH = 0.001              # 目标置信度阈值
+NMS_THRESH = 0.45               # NMS 阈值
+CLASS_NAME = ["bag"]            # 单类别
+OUTPUT_DIR = "./result"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ====================== 工具函数 ======================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+def dfl_numpy(position):
+    """Distribution Focal Loss 解析，纯 NumPy 版本"""
+    n, c, h, w = position.shape
+    p_num = 4
+    mc = c // p_num
+    y = position.reshape(n, p_num, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1,1,mc,1,1)
+    y = np.sum(y * acc, axis=2)
+    return y
+
+def box_process(position):
+    """解析网络输出的框坐标"""
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1,1,grid_h,grid_w)
+    row = row.reshape(1,1,grid_h,grid_w)
+    grid = np.concatenate((col,row), axis=1)
+    stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1)
+
+    position = dfl_numpy(position)
+    box_xy = grid + 0.5 - position[:,0:2,:,:]
+    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
+    xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
+    return xyxy
+
+def filter_boxes(boxes, box_confidences, box_class_probs):
+    # sigmoid objectness
+    box_confidences = 1 / (1 + np.exp(-box_confidences))
+    # softmax class probs
+    box_class_probs = np.exp(box_class_probs)
+    box_class_probs /= np.sum(box_class_probs, axis=-1, keepdims=True)
+
+    box_confidences = box_confidences.reshape(-1)
+    class_max_score = np.max(box_class_probs, axis=-1)
+    classes = np.argmax(box_class_probs, axis=-1)
+    _pos = np.where(class_max_score * box_confidences >= OBJ_THRESH)
+    boxes = boxes[_pos]
+    classes = classes[_pos]
+    scores = (class_max_score * box_confidences)[_pos]
+    return boxes, classes, scores
+
+def post_process(outputs, scale, dx, dy):
+    boxes, classes_conf, scores = [], [], []
+    branch_num = 3
+    for i in range(branch_num):
+        boxes.append(box_process(outputs[i*3]))
+        classes_conf.append(outputs[i*3+1])
+        scores.append(outputs[i*3+2])  # 使用真实 class 输出
+
+    def sp_flatten(x):
+        ch = x.shape[1]
+        x = x.transpose(0,2,3,1)
+        return x.reshape(-1,ch)
+
+    boxes = np.concatenate([sp_flatten(b) for b in boxes])
+    classes_conf = np.concatenate([sp_flatten(c) for c in classes_conf])
+    scores = np.concatenate([sp_flatten(s) for s in scores])
+
+    boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)
+
+    if boxes.shape[0] == 0:
+        return None, None, None
+
+    # 只保留置信度最高的框
+    max_idx = np.argmax(scores)
+    boxes = boxes[max_idx:max_idx+1]
+    classes = classes[max_idx:max_idx+1]
+    scores = scores[max_idx:max_idx+1]
+
+    # 映射回原图
+    boxes[:, [0,2]] -= dx
+    boxes[:, [1,3]] -= dy
+    boxes /= scale
+    boxes = boxes.clip(min=0)
+
+    return boxes, classes, scores
+
+def draw(image, boxes, scores, classes):
+    for box, score, cl in zip(boxes, scores, classes):
+        x1, y1, x2, y2 = [int(b) for b in box]
+        cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 2)
+        cv2.putText(image, f"{CLASS_NAME[cl]} {score:.3f}", (x1, y1-5),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)
+
+# ====================== 主流程 ======================
+img = cv2.imread(IMG_PATH)
+if img is None:
+    raise ValueError(f"Image {IMG_PATH} not found!")
+
+img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE)
+input_data = np.expand_dims(img_resized, 0)  # 4 维输入
+
+rknn = RKNNLite(verbose=False)
+rknn.load_rknn(MODEL_PATH)
+rknn.init_runtime()
+outputs = rknn.inference([input_data])
+rknn.release()
+
+print("Outputs len:", len(outputs))
+for i, out in enumerate(outputs):
+    print(f"outputs[{i}].shape = {out.shape}, min={out.min()}, max={out.max()}, mean={out.mean():.4f}")
+
+boxes, classes, scores = post_process(outputs, scale, dx, dy)
+if boxes is None:
+    print("Detected 0 boxes")
+else:
+    draw(img, boxes, scores, classes)
+    result_path = os.path.join(OUTPUT_DIR, os.path.basename(IMG_PATH))
+    cv2.imwrite(result_path, img)
+    print(f"Detection result saved to {result_path}")
--- a/main/README.md
+++ b/main/README.md
@ -51,7 +51,7 @@ result = calculate_offset_from_image("your_image_path.jpg", visualize=True)
 示例 1: 仅获取偏移量（不画图）

 ```bash
-from caculate_diff2.0 import calculate_offset_from_image
+from calculate_diff2.0 import calculate_offset_from_image

 result = calculate_offset_from_image("11.jpg", visualize=False)
 if result['success']:
@ -63,7 +63,7 @@ else:
 示例 2: 获取偏移量并保存可视化图

 ```bash
-from caculate_diff2.0 import calculate_offset_from_image
+from calculate_diff2.0 import calculate_offset_from_image

 result = calculate_offset_from_image("11.jpg", visualize=True)

@ -76,7 +76,6 @@ result = calculate_offset_from_image("11.jpg", visualize=True)
    dy_mm: 垂直偏移（毫米）
    cx: 中心点 x 坐标（像素）
    cy: 中心点 y 坐标（像素）
-<<<<<<< HEAD
    message: 错误信息或成功提示
    
 ##该函数返回一个包含下列字段的字典2.0：
@ -93,6 +92,3 @@ result = calculate_offset_from_image("11.jpg", visualize=True)
    message: 错误信息或成功提示


-=======
-    message: 错误信息或成功提示
->>>>>>> a6505573b9361ce4ab920ddc55f4bc6d86d7dfb4
--- a/main/caculate_diff(可用设备树版本，测试，不用做推理).py
+++ b/main/caculate_diff(可用设备树版本，测试，不用做推理).py
@ -0,0 +1,256 @@
+# detect_fixed.py
+import cv2
+import numpy as np
+import os
+import platform
+from rknnlite.api import RKNNLite
+
+# ====================== 配置区 ======================
+
+IMAGE_PATH = "11.jpg"  # 测试图片
+MODEL_PATH = "point.rknn"
+OUTPUT_DIR = "./output_rknn"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# 固定参考点 & 缩放因子
+FIXED_REF_POINT = (535, 605)
+width_mm, width_px = 70.0, 42
+height_mm, height_px = 890.0, 507
+SCALE_X = width_mm / float(width_px)
+SCALE_Y = height_mm / float(height_px)
+print(f"[INFO] Scale factors: X={SCALE_X:.3f} mm/px, Y={SCALE_Y:.3f} mm/px")
+
+IMG_SIZE = (640, 640)
+
+# 设备树路径（用于自动识别平台）
+DEVICE_COMPATIBLE_NODE = '/proc/device-tree/compatible'
+
+
+def get_host():
+    system = platform.system()
+    machine = platform.machine()
+    if system == 'Linux' and machine == 'aarch64':
+        try:
+            with open(DEVICE_COMPATIBLE_NODE, 'r') as f:
+                compatible = f.read()
+                if 'rk3588' in compatible:
+                    return 'RK3588'
+                elif 'rk3576' in compatible:
+                    return 'RK3576'
+                elif 'rk3562' in compatible:
+                    return 'RK3562'
+                else:
+                    return 'RK3566_RK3568'
+        except Exception as e:
+            print(f"Read device tree failed: {e}")
+            exit(-1)
+    else:
+        return f"{system}-{machine}"
+
+
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+
+def safe_sigmoid(x):
+    x = np.clip(x, -50, 50)
+    return 1.0 / (1.0 + np.exp(-x))
+
+
+def softmax(x):
+    x = x - np.max(x)
+    e = np.exp(x)
+    return e / e.sum()
+
+
+def dfl_to_xywh(loc, grid_x, grid_y, stride):
+    xywh_ = np.zeros(4)
+    xywh = np.zeros(4)
+    for i in range(4):
+        l = loc[i * 16:(i + 1) * 16]
+        l = softmax(l)
+        xywh_[i] = sum(j * l[j] for j in range(16))
+
+    xywh_[0] = (grid_x + 0.5) - xywh_[0]
+    xywh_[1] = (grid_y + 0.5) - xywh_[1]
+    xywh_[2] = (grid_x + 0.5) + xywh_[2]
+    xywh_[3] = (grid_y + 0.5) + xywh_[3]
+
+    xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride
+    xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride
+    xywh[2] = (xywh_[2] - xywh_[0]) * stride
+    xywh[3] = (xywh_[3] - xywh_[1]) * stride
+
+    xywh[0] -= xywh[2] / 2
+    xywh[1] -= xywh[3] / 2
+    return xywh
+
+
+def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
+    boxes = []
+    obj_confs = []
+    class_ids = []
+
+    for idx in range(3):  # det head
+        det = np.array(outputs[idx])[0]
+        C, H, W = det.shape
+        num_classes = C - 64
+        stride = 640 // H
+
+        for h in range(H):
+            for w in range(W):
+                for c in range(num_classes):
+                    conf = safe_sigmoid(det[64 + c, h, w])
+                    if conf >= conf_threshold:
+                        loc = det[:64, h, w].astype(np.float32)
+                        xywh = dfl_to_xywh(loc, w, h, stride)
+                        boxes.append(xywh)
+                        obj_confs.append(conf)
+                        class_ids.append(c)
+
+    if not obj_confs:
+        best_box = np.array([0, 0, 0, 0])
+        class_id = -1
+        obj_conf = 0.0
+    else:
+        max_idx = np.argmax(obj_confs)
+        best_box = boxes[max_idx]
+        class_id = class_ids[max_idx]
+        obj_conf = obj_confs[max_idx]
+
+    x, y, w, h = best_box
+    x = (x - dx) / scale
+    y = (y - dy) / scale
+    w /= scale
+    h /= scale
+    best_box = [x, y, w, h]
+
+    kpt_output = np.array(outputs[3])[0]
+    confs = kpt_output[:, 2, :]
+    best_anchor_idx = np.argmax(np.mean(confs, axis=0))
+    kpt_data = kpt_output[:, :, best_anchor_idx]
+
+    keypoints = []
+    for i in range(kpt_data.shape[0]):
+        x_img, y_img, vis_raw = kpt_data[i]
+        vis_prob = safe_sigmoid(vis_raw)
+        x_orig = (x_img - dx) / scale
+        y_orig = (y_img - dy) / scale
+        keypoints.append([x_orig, y_orig, vis_prob])
+
+    return np.array(keypoints), class_id, obj_conf, best_box
+
+
+def compute_offset(keypoints, fixed_point, scale_x, scale_y):
+    if len(keypoints) < 2: return None
+    p1, p2 = keypoints[0], keypoints[1]
+    cx = (p1[0] + p2[0]) / 2
+    cy = (p1[1] + p2[1]) / 2
+    dx_mm = (cx - fixed_point[0]) * scale_x
+    dy_mm = (cy - fixed_point[1]) * scale_y
+    return cx, cy, dx_mm, dy_mm
+
+
+def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path):
+    vis = image.copy()
+    fx, fy = map(int, fixed_point)
+    cx, cy, dx_mm, dy_mm = offset_info
+
+    for i, (x, y, conf) in enumerate(keypoints[:2]):
+        if conf > 0.5:
+            color = (0, 0, 255) if i == 0 else (0, 255, 255)
+            cv2.circle(vis, (int(x), int(y)), 6, color, -1)
+    if len(keypoints) >= 2:
+        cv2.line(vis, (int(keypoints[0][0]), int(keypoints[0][1])),
+                 (int(keypoints[1][0]), int(keypoints[1][1])),
+                 (0, 255, 0), 2)
+
+    x, y, w, h = bbox
+    cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2)
+    cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3)
+    cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3)
+    cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05)
+    cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
+    cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
+    cv2.imwrite(save_path, vis)
+
+
+def main():
+    host_name = get_host()
+    print(f"[INFO] Running on: {host_name}")
+
+    rknn = RKNNLite()
+
+    # ---- Load Model ----
+    ret = rknn.load_rknn(MODEL_PATH)
+    if ret != 0:
+        print("❌ Failed to load RKNN model!")
+        exit(ret)
+    print("✅ Model loaded successfully.")
+
+    # ---- Init Runtime ----
+    if host_name in ['RK3576', 'RK3588']:
+        ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
+    else:
+        ret = rknn.init_runtime()
+    if ret != 0:
+        print("❌ Init runtime failed!")
+        rknn.release()
+        exit(ret)
+    print("✅ Runtime initialized.")
+
+    # ---- Preprocess ----
+    ori_img = cv2.imread(IMAGE_PATH)
+    if ori_img is None:
+        print(f"❌ Cannot read image: {IMAGE_PATH}")
+        return
+
+    img_resized, scale, dx, dy = letterbox_resize(ori_img, IMG_SIZE)
+    input_tensor = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)  # RGB
+
+    # ---- Inference ----
+    print("🔍 Starting inference...")
+    outputs = rknn.inference(inputs=[input_tensor])
+    print("✅ Inference completed.")
+
+    # ---- Post-process ----
+    try:
+        keypoints, cls_id, obj_conf, bbox = parse_pose_outputs(
+            outputs, dx=dx, dy=dy, scale=scale)
+
+        offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
+        if offset_info is None:
+            print("⚠️ Not enough keypoints detected.")
+            return
+
+        cx, cy, dx_mm, dy_mm = offset_info
+        vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(IMAGE_PATH)}")
+        visualize_result(ori_img, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path)
+
+        print(f"\n🎯 Detection Result:")
+        print(f"Center: ({cx:.1f}, {cy:.1f})")
+        print(f"Offset: ΔX={dx_mm:+.2f}mm, ΔY={dy_mm:+.2f}mm")
+        print(f"Class: {cls_id}, Confidence: {obj_conf:.3f}")
+        print(f"Saved result to: {vis_save_path}")
+
+    except Exception as e:
+        print(f"❌ Post-processing error: {e}")
+        import traceback
+        traceback.print_exc()
+
+    finally:
+        rknn.release()
+
+
+if __name__ == "__main__":
+    main()
--- a/main/calculate_diff2.0.py
+++ b/main/calculate_diff2.0.py
@ -4,6 +4,7 @@ import os
 from rknnlite.api import RKNNLite

 # ====================== 配置区 ======================
+
 MODEL_PATH = "point.rknn"
 OUTPUT_DIR = "./output_rknn"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
--- a/main/point.rknn
+++ b/main/point.rknn