完成夹具开合判断的代码，更新了rknn3588上完成测试，3568在生产下次去测试的时候再进行测试

2025-11-16 22:02:11 +08:00
parent 514ed6f1fd
commit 267aac1605
12 changed files with 973 additions and 0 deletions
--- a/cls_main/cls_rk3568.rknn
+++ b/cls_main/cls_rk3568.rknn
--- a/cls_main/cls_rk3588.rknn
+++ b/cls_main/cls_rk3588.rknn
--- a/cls_main/main_cls.py
+++ b/cls_main/main_cls.py
@ -0,0 +1,99 @@
 import cv2
 import numpy as np
 import platform
 from rknnlite.api import RKNNLite
 # ------------------- 全局变量 -------------------
 _global_rknn_instance = None
 labels = {0: '夹具夹紧', 1: '夹具打开'}
 # ROI: x, y, w, h
 ROI = (818, 175, 1381, 1271)   # 示例
 DEVICE_COMPATIBLE_NODE = '/proc/device-tree/compatible'
 # ------------------- 主机信息 -------------------
 def get_host():
    system = platform.system()
    machine = platform.machine()
    os_machine = system + '-' + machine
    if os_machine == 'Linux-aarch64':
        try:
            with open(DEVICE_COMPATIBLE_NODE) as f:
                device_compatible_str = f.read()
                if 'rk3562' in device_compatible_str:
                    host = 'RK3562'
                elif 'rk3576' in device_compatible_str:
                    host = 'RK3576'
                elif 'rk3588' in device_compatible_str:
                    host = 'RK3588'
                else:
                    host = 'RK3566_RK3568'
        except IOError:
            print('Read device node {} failed.'.format(DEVICE_COMPATIBLE_NODE))
            exit(-1)
    else:
        host = os_machine
    return host
 # ------------------- RKNN 模型初始化（只加载一次） -------------------
 def init_rknn_model(model_path):
    global _global_rknn_instance
    if _global_rknn_instance is None:
        rknn_lite = RKNNLite(verbose=False)
        ret = rknn_lite.load_rknn(model_path)
        if ret != 0:
            raise RuntimeError(f'Load model failed: {ret}')
        ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
        if ret != 0:
            raise RuntimeError(f'Init runtime failed: {ret}')
        _global_rknn_instance = rknn_lite
        print(f'[INFO] RKNN model loaded: {model_path}')
    return _global_rknn_instance
 # ------------------- 图像预处理 + ROI 裁剪 -------------------
 def preprocess(raw_image, target_size=(640, 640)):
    """
    ROI 裁剪 + resize + batch 维度
    """
    global ROI
    x, y, w, h = ROI
    roi_img = raw_image[y:y+h, x:x+w]
    img_resized = cv2.resize(roi_img, target_size)
    img_batch = np.expand_dims(img_resized, 0)  # 添加 batch 维度
    return img_batch
 # ------------------- 推理函数 -------------------
 def yolov11_cls_inference_once(rknn, raw_image, target_size=(640, 640)):
    """
    使用已加载的 rknn 实例进行推理
    返回: (class_id, boolean)
    """
    img = preprocess(raw_image, target_size)
    outputs = rknn.inference([img])
    output = outputs[0].reshape(-1)
    class_id = int(np.argmax(output))
    bool_value = class_id == 1
    return class_id, bool_value
 # ------------------- 测试 -------------------
 if __name__ == '__main__':
    image_path = "./test_image/class1/2.jpg"
    model_path = "cls_rk3588.rknn"
    bgr_image = cv2.imread(image_path)
    if bgr_image is None:
        raise RuntimeError(f"Failed to read image: {image_path}")
    rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
    # 只初始化一次模型
    rknn_model = init_rknn_model(model_path)
    # 多次调用都用同一个 rknn_model
    class_id, bool_value = yolov11_cls_inference_once(rknn_model, rgb_image)
    print(f"类别ID: {class_id}, 布尔值: {bool_value}")
--- a/cls_main/readme.md
+++ b/cls_main/readme.md
@ -0,0 +1,58 @@
 # yolov11_cls_inference README
 ## 概述
 该模块用于对米厂输入图像执行二分类推理，用于判断机械臂夹爪是否夹紧。
 类别定义：
 0 → 夹具夹紧 (False)
 1 → 夹具打开 (True)
 rknn模型只加载一次，复用全局实例，提高推理效率。
 ## 调用示例
 您可以直接调用 yolov11_cls_inference 函数，以便集成到其他项目中：
 示例 1: 单张图片推理
 ```bash
 from main_cls import yolov11_cls_inference
 import cv2
 # 读取图像
 bgr_image = cv2.imread("11.jpg")
 rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
 # 调用推理函数
 class_id, bool_value = yolov11_cls_inference(
    model_path="yolov11_cls.rknn",
    raw_image=rgb_image,
    target_size=(640, 640)
 )
 print(f"类别ID: {class_id}, 布尔值: {bool_value}")
 ```
 示例 2: 多次推理（复用模型）
 ```bash
 from main_cls import init_rknn_model, yolov11_cls_inference_once
 import cv2
 # 初始化一次模型
 rknn_model = init_rknn_model("cls_rk3568.rknn")
 # 读取图像
 bgr_image = cv2.imread("12.jpg")
 rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
 # 使用已加载模型进行推理
 class_id, bool_value = yolov11_cls_inference_once(rknn_model, rgb_image)
 if bool_value:
    print("夹具夹紧")
 else:
    print("夹具打开")
 ```
--- a/cls_main/test_image/class0/1.jpg
+++ b/cls_main/test_image/class0/1.jpg
--- a/cls_main/test_image/class1/1.jpg
+++ b/cls_main/test_image/class1/1.jpg
--- a/cls_main/test_image/class1/2.jpg
+++ b/cls_main/test_image/class1/2.jpg
--- a/point_diff_main/README.md
+++ b/point_diff_main/README.md
@ -0,0 +1,94 @@
 # RKNN 关键点推理与偏移量计算工具
 该工具通过使用RKNN模型对输入图像进行关键点检测，并根据检测结果计算相对于固定参考点的偏移量（单位：毫米）。此外，还提供了可视化选项来展示计算结果。
 ## 目录结构
 ├── calculate_offset.py    # 主程序脚本
 ├── point.rknn             # RKNN 模型文件 (请确保正确路径)
 └── README.md              # 说明文档
 ## 配置
 在 `calculate_offset.py` 文件顶部的配置区中，您可以修改如下参数以适应您的需求：
 - **MODEL_PATH**: RKNN 模型文件路径。
 - **OUTPUT_DIR**: 输出目录路径。
 - **FIXED_REF_POINT**: 固定参考点坐标（像素）。
 - **SCALE_X**, **SCALE_Y**: 缩放因子，用于将像素坐标转换为毫米。
 - **IMG_SIZE**: 输入图像尺寸。
 ## 安装依赖
 请确保安装了必要的 Python 库。可以通过 pip 安装：
 ```bash
 pip install opencv-python numpy rknnlite
 ```
 ## 函数调用1.0
 您也可以直接调用 calculate_offset_from_image 函数，以便集成到其他项目中：
 示例 1: 仅获取偏移量（不画图）
 ```bash
 from calculate_offset import calculate_offset_from_image
 result = calculate_offset_from_image("your_image_path.jpg", visualize=False)
 if result['success']:
    print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
 else:
    print("Error:", result['message'])
 ```
 示例 2: 获取偏移量并保存可视化图
 ```bash
 from calculate_offset import calculate_offset_from_image
 result = calculate_offset_from_image("your_image_path.jpg", visualize=True)
 ```
 ## 函数调用2.0
 示例 1: 仅获取偏移量（不画图）
 ```bash
 from calculate_diff2.0 import calculate_offset_from_image
 result = calculate_offset_from_image("11.jpg", visualize=False)
 if result['success']:
    print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
 else:
    print("Error:", result['message'])
 ```
 示例 2: 获取偏移量并保存可视化图
 ```bash
 from calculate_diff2.0 import calculate_offset_from_image
 result = calculate_offset_from_image("11.jpg", visualize=True)
 ```
 ##该函数返回一个包含下列字段的字典1.0：
    success: 成功标志（True/False）
    dx_mm: 水平偏移（毫米）
    dy_mm: 垂直偏移（毫米）
    cx: 中心点 x 坐标（像素）
    cy: 中心点 y 坐标（像素）
    message: 错误信息或成功提示
 ##该函数返回一个包含下列字段的字典2.0：
    success: 成功标志（True/False）
    dx_mm: 水平偏移（毫米）
    dy_mm: 垂直偏移（毫米）
    cx: 中心点 x 坐标（像素）
    cy: 中心点 y 坐标（像素）
    message: 错误信息或成功提示
    class_id: 检测类别 ID  #这里是bag的id是0
    obj_conf: 检测置信度   #这就是识别为料袋的置信度
    bbox: 检测矩形框 [x_left, y_top, width, height]
    message: 错误信息或成功提示
--- a/point_diff_main/caculate_diff(可用设备树版本，测试，不用做推理).py
+++ b/point_diff_main/caculate_diff(可用设备树版本，测试，不用做推理).py
@ -0,0 +1,256 @@
 # detect_fixed.py
 import cv2
 import numpy as np
 import os
 import platform
 from rknnlite.api import RKNNLite
 # ====================== 配置区 ======================
 IMAGE_PATH = "11.jpg"  # 测试图片
 MODEL_PATH = "point.rknn"
 OUTPUT_DIR = "./output_rknn"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # 固定参考点 & 缩放因子
 FIXED_REF_POINT = (535, 605)
 width_mm, width_px = 70.0, 42
 height_mm, height_px = 890.0, 507
 SCALE_X = width_mm / float(width_px)
 SCALE_Y = height_mm / float(height_px)
 print(f"[INFO] Scale factors: X={SCALE_X:.3f} mm/px, Y={SCALE_Y:.3f} mm/px")
 IMG_SIZE = (640, 640)
 # 设备树路径（用于自动识别平台）
 DEVICE_COMPATIBLE_NODE = '/proc/device-tree/compatible'
 def get_host():
    system = platform.system()
    machine = platform.machine()
    if system == 'Linux' and machine == 'aarch64':
        try:
            with open(DEVICE_COMPATIBLE_NODE, 'r') as f:
                compatible = f.read()
                if 'rk3588' in compatible:
                    return 'RK3588'
                elif 'rk3576' in compatible:
                    return 'RK3576'
                elif 'rk3562' in compatible:
                    return 'RK3562'
                else:
                    return 'RK3566_RK3568'
        except Exception as e:
            print(f"Read device tree failed: {e}")
            exit(-1)
    else:
        return f"{system}-{machine}"
 def letterbox_resize(image, size, bg_color=114):
    target_w, target_h = size
    h, w = image.shape[:2]
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(image, (new_w, new_h))
    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
    canvas[dy:dy + new_h, dx:dx + new_w] = resized
    return canvas, scale, dx, dy
 def safe_sigmoid(x):
    x = np.clip(x, -50, 50)
    return 1.0 / (1.0 + np.exp(-x))
 def softmax(x):
    x = x - np.max(x)
    e = np.exp(x)
    return e / e.sum()
 def dfl_to_xywh(loc, grid_x, grid_y, stride):
    xywh_ = np.zeros(4)
    xywh = np.zeros(4)
    for i in range(4):
        l = loc[i * 16:(i + 1) * 16]
        l = softmax(l)
        xywh_[i] = sum(j * l[j] for j in range(16))
    xywh_[0] = (grid_x + 0.5) - xywh_[0]
    xywh_[1] = (grid_y + 0.5) - xywh_[1]
    xywh_[2] = (grid_x + 0.5) + xywh_[2]
    xywh_[3] = (grid_y + 0.5) + xywh_[3]
    xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride
    xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride
    xywh[2] = (xywh_[2] - xywh_[0]) * stride
    xywh[3] = (xywh_[3] - xywh_[1]) * stride
    xywh[0] -= xywh[2] / 2
    xywh[1] -= xywh[3] / 2
    return xywh
 def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
    boxes = []
    obj_confs = []
    class_ids = []
    for idx in range(3):  # det head
        det = np.array(outputs[idx])[0]
        C, H, W = det.shape
        num_classes = C - 64
        stride = 640 // H
        for h in range(H):
            for w in range(W):
                for c in range(num_classes):
                    conf = safe_sigmoid(det[64 + c, h, w])
                    if conf >= conf_threshold:
                        loc = det[:64, h, w].astype(np.float32)
                        xywh = dfl_to_xywh(loc, w, h, stride)
                        boxes.append(xywh)
                        obj_confs.append(conf)
                        class_ids.append(c)
    if not obj_confs:
        best_box = np.array([0, 0, 0, 0])
        class_id = -1
        obj_conf = 0.0
    else:
        max_idx = np.argmax(obj_confs)
        best_box = boxes[max_idx]
        class_id = class_ids[max_idx]
        obj_conf = obj_confs[max_idx]
    x, y, w, h = best_box
    x = (x - dx) / scale
    y = (y - dy) / scale
    w /= scale
    h /= scale
    best_box = [x, y, w, h]
    kpt_output = np.array(outputs[3])[0]
    confs = kpt_output[:, 2, :]
    best_anchor_idx = np.argmax(np.mean(confs, axis=0))
    kpt_data = kpt_output[:, :, best_anchor_idx]
    keypoints = []
    for i in range(kpt_data.shape[0]):
        x_img, y_img, vis_raw = kpt_data[i]
        vis_prob = safe_sigmoid(vis_raw)
        x_orig = (x_img - dx) / scale
        y_orig = (y_img - dy) / scale
        keypoints.append([x_orig, y_orig, vis_prob])
    return np.array(keypoints), class_id, obj_conf, best_box
 def compute_offset(keypoints, fixed_point, scale_x, scale_y):
    if len(keypoints) < 2: return None
    p1, p2 = keypoints[0], keypoints[1]
    cx = (p1[0] + p2[0]) / 2
    cy = (p1[1] + p2[1]) / 2
    dx_mm = (cx - fixed_point[0]) * scale_x
    dy_mm = (cy - fixed_point[1]) * scale_y
    return cx, cy, dx_mm, dy_mm
 def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path):
    vis = image.copy()
    fx, fy = map(int, fixed_point)
    cx, cy, dx_mm, dy_mm = offset_info
    for i, (x, y, conf) in enumerate(keypoints[:2]):
        if conf > 0.5:
            color = (0, 0, 255) if i == 0 else (0, 255, 255)
            cv2.circle(vis, (int(x), int(y)), 6, color, -1)
    if len(keypoints) >= 2:
        cv2.line(vis, (int(keypoints[0][0]), int(keypoints[0][1])),
                 (int(keypoints[1][0]), int(keypoints[1][1])),
                 (0, 255, 0), 2)
    x, y, w, h = bbox
    cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2)
    cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3)
    cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3)
    cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05)
    cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
    cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
    cv2.imwrite(save_path, vis)
 def main():
    host_name = get_host()
    print(f"[INFO] Running on: {host_name}")
    rknn = RKNNLite()
    # ---- Load Model ----
    ret = rknn.load_rknn(MODEL_PATH)
    if ret != 0:
        print("❌ Failed to load RKNN model!")
        exit(ret)
    print("✅ Model loaded successfully.")
    # ---- Init Runtime ----
    if host_name in ['RK3576', 'RK3588']:
        ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
    else:
        ret = rknn.init_runtime()
    if ret != 0:
        print("❌ Init runtime failed!")
        rknn.release()
        exit(ret)
    print("✅ Runtime initialized.")
    # ---- Preprocess ----
    ori_img = cv2.imread(IMAGE_PATH)
    if ori_img is None:
        print(f"❌ Cannot read image: {IMAGE_PATH}")
        return
    img_resized, scale, dx, dy = letterbox_resize(ori_img, IMG_SIZE)
    input_tensor = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)  # RGB
    # ---- Inference ----
    print("🔍 Starting inference...")
    outputs = rknn.inference(inputs=[input_tensor])
    print("✅ Inference completed.")
    # ---- Post-process ----
    try:
        keypoints, cls_id, obj_conf, bbox = parse_pose_outputs(
            outputs, dx=dx, dy=dy, scale=scale)
        offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
        if offset_info is None:
            print("⚠️ Not enough keypoints detected.")
            return
        cx, cy, dx_mm, dy_mm = offset_info
        vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(IMAGE_PATH)}")
        visualize_result(ori_img, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path)
        print(f"\n🎯 Detection Result:")
        print(f"Center: ({cx:.1f}, {cy:.1f})")
        print(f"Offset: ΔX={dx_mm:+.2f}mm, ΔY={dy_mm:+.2f}mm")
        print(f"Class: {cls_id}, Confidence: {obj_conf:.3f}")
        print(f"Saved result to: {vis_save_path}")
    except Exception as e:
        print(f"❌ Post-processing error: {e}")
        import traceback
        traceback.print_exc()
    finally:
        rknn.release()
 if __name__ == "__main__":
    main()
--- a/point_diff_main/calculate_diff.py
+++ b/point_diff_main/calculate_diff.py
@ -0,0 +1,230 @@
 import cv2
 import numpy as np
 import os
 from rknnlite.api import RKNNLite
 # ====================== 配置区 ======================
 MODEL_PATH = "point.rknn"
 OUTPUT_DIR = "./output_rknn"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # 固定参考点（像素坐标）
 FIXED_REF_POINT = (535, 605)
 # mm/px 缩放因子（根据标定数据填写）
 width_mm = 70.0
 width_px = 42
 SCALE_X = width_mm / float(width_px)
 height_mm = 890.0
 height_px = 507
 SCALE_Y = height_mm / float(height_px)
 print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px")
 # 输入尺寸
 IMG_SIZE = (640, 640)
 def letterbox_resize(image, size, bg_color=114):
    """保持比例缩放并填充到指定大小"""
    target_w, target_h = size
    h, w = image.shape[:2]
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(image, (new_w, new_h))
    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
    canvas[dy:dy + new_h, dx:dx + new_w] = resized
    return canvas, scale, dx, dy
 def safe_sigmoid(x):
    x = np.clip(x, -50, 50)
    return 1.0 / (1.0 + np.exp(-x))
 def parse_pose_outputs(outputs, dx=0, dy=0, scale=1.0):
    """
    解析 RKNN YOLO-Pose 关键点输出
    outputs[3]: shape (1, 4, 3, 8400) -> [kpt_id, (x,y,conf), anchor]
    """
    kpt_output = np.array(outputs[3])[0]  # (4, 3, 8400)
    confs = kpt_output[:, 2, :]  # 取每个关键点的 visible_conf
    mean_conf_per_anchor = np.mean(confs, axis=0)  # 每个 anchor 的平均可见性
    best_anchor_idx = np.argmax(mean_conf_per_anchor)
    kpt_data = kpt_output[:, :, best_anchor_idx]  # (4, 3): x, y, vis_conf
    keypoints = []
    for i in range(4):
        x_img = kpt_data[i, 0]
        y_img = kpt_data[i, 1]
        vis_conf_raw = kpt_data[i, 2]
        vis_prob = safe_sigmoid(vis_conf_raw)
        # 映射回原图坐标
        x_orig = (x_img - dx) / scale
        y_orig = (y_img - dy) / scale
        keypoints.append([x_orig, y_orig, vis_prob])
    return np.array(keypoints)
 def compute_offset(keypoints, fixed_point, scale_x, scale_y):
    """
    计算中心点相对于固定参考点的偏移量（mm）
    中心点 = P0 和 P1 的中点
    返回: (center_x, center_y, dx_mm, dy_mm)
    """
    if len(keypoints) < 2:
        return None
    p1, p2 = keypoints[0], keypoints[1]
    cx = (p1[0] + p2[0]) / 2.0
    cy = (p1[1] + p2[1]) / 2.0
    dx_px = cx - fixed_point[0]
    dy_px = cy - fixed_point[1]
    dx_mm = dx_px * scale_x
    dy_mm = dy_px * scale_y
    return cx, cy, dx_mm, dy_mm
 def visualize_result(image, keypoints, fixed_point, offset_info, save_path):
    """
    可视化关键点、参考点、中心点、偏移箭头和文字
    """
    vis = image.copy()
    colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0)]
    cx, cy, dx_mm, dy_mm = offset_info
    fx, fy = map(int, fixed_point)
    # 绘制关键点
    for i, (x, y, conf) in enumerate(keypoints):
        if conf > 0.5:
            cv2.circle(vis, (int(x), int(y)), 8, colors[i], -1)
            cv2.putText(vis, f"P{i}", (int(x) + 10, int(y) - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, colors[i], 2)
    # 绘制中心点
    cv2.circle(vis, (int(cx), int(cy)), 12, (0, 255, 0), 3)
    cv2.putText(vis, "Center", (int(cx) + 20, int(cy)),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
    # 绘制参考点
    cv2.circle(vis, (fx, fy), 15, (255, 255, 0), 3)
    cv2.putText(vis, "Ref", (fx + 20, fy),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3)
    # 绘制偏移箭头和文字
    cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (0, 255, 255), 3, tipLength=0.05)
    cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 40, fy - 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)
    cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 40, fy + 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)
    cv2.imwrite(save_path, vis)
 def calculate_offset_from_image(image_path, visualize=False):
    """
    主函数：输入图片路径，输出偏移量 (dx_mm, dy_mm)
    参数:
        image_path (str): 输入图像路径
        visualize (bool): 是否保存可视化结果
    返回:
        dict: {
            'success': bool,
            'dx_mm': float or None,
            'dy_mm': float or None,
            'cx': float or None,     # 中心点 x
            'cy': float or None,     # 中心点 y
            'message': str
        }
    """
    # 读取图像
    orig = cv2.imread(image_path)
    if orig is None:
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': f'Failed to load image: {image_path}'
        }
    h0, w0 = orig.shape[:2]
    # 预处理
    img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE)
    infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)
    # 加载模型并推理
    rknn = RKNNLite(verbose=False)
    ret = rknn.load_rknn(MODEL_PATH)
    if ret != 0:
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': 'Failed to load RKNN model'
        }
    try:
        rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
        outputs = rknn.inference([infer_img])
    except Exception as e:
        rknn.release()
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': f'Inference error: {str(e)}'
        }
    finally:
        rknn.release()
    # 解析关键点
    try:
        keypoints = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale)
    except Exception as e:
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': f'Parse keypoint error: {str(e)}'
        }
    # 计算偏移
    offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
    if offset_info is None:
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': 'Not enough keypoints to compute offset'
        }
    cx, cy, dx_mm, dy_mm = offset_info
    # 可视化（可选）
    if visualize:
        vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}")
        visualize_result(orig, keypoints, FIXED_REF_POINT, offset_info, vis_save_path)
    return {
        'success': True,
        'dx_mm': dx_mm,
        'dy_mm': dy_mm,
        'cx': cx,
        'cy': cy,
        'message': 'Success'
    }
 # ====================== 使用示例 ======================
 if __name__ == "__main__":
    image_path = "11.jpg"
    result = calculate_offset_from_image(image_path, visualize=True)
    if result['success']:
        print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
        print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
    else:
        print("Error:", result['message'])
--- a/point_diff_main/calculate_diff2.0.py
+++ b/point_diff_main/calculate_diff2.0.py
@ -0,0 +1,236 @@
 import cv2
 import numpy as np
 import os
 from rknnlite.api import RKNNLite
 # ====================== 配置区 ======================
 MODEL_PATH = "point.rknn"
 OUTPUT_DIR = "./output_rknn"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # 固定参考点（像素坐标）
 FIXED_REF_POINT = (535, 605)
 # mm/px 缩放因子（根据标定数据填写）
 width_mm = 70.0
 width_px = 42
 SCALE_X = width_mm / float(width_px)
 height_mm = 890.0
 height_px = 507
 SCALE_Y = height_mm / float(height_px)
 print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px")
 # 输入尺寸
 IMG_SIZE = (640, 640)
 def letterbox_resize(image, size, bg_color=114):
    target_w, target_h = size
    h, w = image.shape[:2]
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(image, (new_w, new_h))
    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
    canvas[dy:dy + new_h, dx:dx + new_w] = resized
    return canvas, scale, dx, dy
 def safe_sigmoid(x):
    x = np.clip(x, -50, 50)
    return 1.0 / (1.0 + np.exp(-x))
 def softmax(x):
    x = x - np.max(x)
    e = np.exp(x)
    return e / e.sum()
 def dfl_to_xywh(loc, grid_x, grid_y, stride):
    """将 DFL 输出解析为 xywh"""
    xywh_ = np.zeros(4)
    xywh = np.zeros(4)
    # 每个维度 16 bins 做 softmax
    for i in range(4):
        l = loc[i * 16:(i + 1) * 16]
        l = softmax(l)
        xywh_[i] = sum([j * l[j] for j in range(16)])
    # 对应公式
    xywh_[0] = (grid_x + 0.5) - xywh_[0]
    xywh_[1] = (grid_y + 0.5) - xywh_[1]
    xywh_[2] = (grid_x + 0.5) + xywh_[2]
    xywh_[3] = (grid_y + 0.5) + xywh_[3]
    # 转成中心点 + 宽高
    xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride
    xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride
    xywh[2] = (xywh_[2] - xywh_[0]) * stride
    xywh[3] = (xywh_[3] - xywh_[1]) * stride
    # 转为左上角坐标
    xywh[0] = xywh[0] - xywh[2] / 2
    xywh[1] = xywh[1] - xywh[3] / 2
    return xywh
 def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
    """
    完整解析 RKNN YOLO-Pose 输出
    返回 keypoints, class_id, obj_conf, bbox（已映射回原图）
    """
    boxes = []
    obj_confs = []
    class_ids = []
    # 遍历前三个输出 tensor (det 输出)
    for idx in range(3):
        det = np.array(outputs[idx])[0]  # (C,H,W)
        C, H, W = det.shape
        num_classes = C - 64  # 前64通道为 DFL bbox
        stride = 640 // H
        for h in range(H):
            for w in range(W):
                for c in range(num_classes):
                    conf = safe_sigmoid(det[64 + c, h, w])
                    if conf >= conf_threshold:
                        loc = det[:64, h, w].astype(np.float32)
                        xywh = dfl_to_xywh(loc, w, h, stride)
                        boxes.append(xywh)
                        obj_confs.append(conf)
                        class_ids.append(c)
    if not obj_confs:
        best_box = np.array([0, 0, 0, 0])
        class_id = -1
        obj_conf = 0.0
    else:
        max_idx = np.argmax(obj_confs)
        best_box = boxes[max_idx]
        class_id = class_ids[max_idx]
        obj_conf = obj_confs[max_idx]
    # 🔹 bbox 坐标映射回原图
    x, y, w, h = best_box
    x = (x - dx) / scale
    y = (y - dy) / scale
    w = w / scale
    h = h / scale
    best_box = np.array([x, y, w, h])
    # 🔹 关键点解析
    kpt_output = np.array(outputs[3])[0]  # (num_kpts, 3, num_anchor)
    confs = kpt_output[:, 2, :]
    best_anchor_idx = np.argmax(np.mean(confs, axis=0))
    kpt_data = kpt_output[:, :, best_anchor_idx]
    keypoints = []
    for i in range(kpt_data.shape[0]):
        x_img, y_img, vis_conf_raw = kpt_data[i]
        vis_prob = safe_sigmoid(vis_conf_raw)
        x_orig = (x_img - dx) / scale
        y_orig = (y_img - dy) / scale
        keypoints.append([x_orig, y_orig, vis_prob])
    return np.array(keypoints), class_id, obj_conf, best_box
 def compute_offset(keypoints, fixed_point, scale_x, scale_y):
    if len(keypoints) < 2:
        return None
    p1, p2 = keypoints[0], keypoints[1]
    cx = (p1[0] + p2[0]) / 2.0
    cy = (p1[1] + p2[1]) / 2.0
    dx_mm = (cx - fixed_point[0]) * scale_x
    dy_mm = (cy - fixed_point[1]) * scale_y
    return cx, cy, dx_mm, dy_mm
 def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path):
    vis = image.copy()
    colors = [(0, 0, 255), (0, 255, 255)]
    cx, cy, dx_mm, dy_mm = offset_info
    fx, fy = map(int, fixed_point)
    # 绘制关键点
    for i, (x, y, conf) in enumerate(keypoints[:2]):
        if conf > 0.5:
            cv2.circle(vis, (int(x), int(y)), 6, colors[i], -1)
    if len(keypoints) >= 2:
        cv2.line(vis,
                 (int(keypoints[0][0]), int(keypoints[0][1])),
                 (int(keypoints[1][0]), int(keypoints[1][1])),
                 (0, 255, 0), 2)
    # 绘制 bbox
    x, y, w, h = bbox
    cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2)
    # 绘制中心点
    cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3)
    cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3)
    cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05)
    cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
    cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
    cv2.imwrite(save_path, vis)
 def calculate_offset_from_image(image_path, visualize=False):
    orig = cv2.imread(image_path)
    if orig is None:
        return {'success': False, 'message': f'Failed to load image: {image_path}'}
    img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE)
    infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)
    rknn = RKNNLite(verbose=False)
    ret = rknn.load_rknn(MODEL_PATH)
    if ret != 0:
        return {'success': False, 'message': 'Failed to load RKNN model'}
    try:
        rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
        outputs = rknn.inference([infer_img])
    finally:
        rknn.release()
    try:
        keypoints, class_id, obj_conf, bbox = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale)
    except Exception as e:
        return {'success': False, 'message': f'Parse error: {str(e)}'}
    offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
    if offset_info is None:
        return {'success': False, 'message': 'Not enough keypoints'}
    cx, cy, dx_mm, dy_mm = offset_info
    if visualize:
        vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}")
        visualize_result(orig, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path)
    return {'success': True, 'dx_mm': dx_mm, 'dy_mm': dy_mm,
            'cx': cx, 'cy': cy, 'class_id': class_id,
            'obj_conf': obj_conf, 'bbox': bbox,
            'message': 'Success'}
 # ====================== 使用示例 ======================
 if __name__ == "__main__":
    image_path = "11.jpg"
    result = calculate_offset_from_image(image_path, visualize=True)
    if result['success']:
        print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
        print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
        print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}")
        print(f"BBox: {result['bbox']}")
    else:
        print("Error:", result['message'])
--- a/point_diff_main/point.rknn
+++ b/point_diff_main/point.rknn