ailai_image_point_diff/point_save/calculate_diff.py

import cv2
import numpy as np
import os
from rknnlite.api import RKNNLite

# ====================== 配置区 ======================
MODEL_PATH = "point.rknn"
OUTPUT_DIR = "./output_rknn"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# 固定参考点（像素坐标）
FIXED_REF_POINT = (535, 605)

# mm/px 缩放因子（根据标定数据填写）
width_mm = 70.0
width_px = 42
SCALE_X = width_mm / float(width_px)
height_mm = 890.0
height_px = 507
SCALE_Y = height_mm / float(height_px)
print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px")

# 输入尺寸
IMG_SIZE = (640, 640)


def letterbox_resize(image, size, bg_color=114):
    """保持比例缩放并填充到指定大小"""
    target_w, target_h = size
    h, w = image.shape[:2]
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(image, (new_w, new_h))
    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
    canvas[dy:dy + new_h, dx:dx + new_w] = resized
    return canvas, scale, dx, dy


def safe_sigmoid(x):
    x = np.clip(x, -50, 50)
    return 1.0 / (1.0 + np.exp(-x))


def parse_pose_outputs(outputs, dx=0, dy=0, scale=1.0):
    """
    解析 RKNN YOLO-Pose 关键点输出
    outputs[3]: shape (1, 4, 3, 8400) -> [kpt_id, (x,y,conf), anchor]
    """
    kpt_output = np.array(outputs[3])[0]  # (4, 3, 8400)
    confs = kpt_output[:, 2, :]  # 取每个关键点的 visible_conf
    mean_conf_per_anchor = np.mean(confs, axis=0)  # 每个 anchor 的平均可见性
    best_anchor_idx = np.argmax(mean_conf_per_anchor)
    kpt_data = kpt_output[:, :, best_anchor_idx]  # (4, 3): x, y, vis_conf

    keypoints = []
    for i in range(4):
        x_img = kpt_data[i, 0]
        y_img = kpt_data[i, 1]
        vis_conf_raw = kpt_data[i, 2]
        vis_prob = safe_sigmoid(vis_conf_raw)

        # 映射回原图坐标
        x_orig = (x_img - dx) / scale
        y_orig = (y_img - dy) / scale
        keypoints.append([x_orig, y_orig, vis_prob])

    return np.array(keypoints)


def compute_offset(keypoints, fixed_point, scale_x, scale_y):
    """
    计算中心点相对于固定参考点的偏移量（mm）
    中心点 = P0 和 P1 的中点
    返回: (center_x, center_y, dx_mm, dy_mm)
    """
    if len(keypoints) < 2:
        return None

    p1, p2 = keypoints[0], keypoints[1]
    cx = (p1[0] + p2[0]) / 2.0
    cy = (p1[1] + p2[1]) / 2.0

    dx_px = cx - fixed_point[0]
    dy_px = cy - fixed_point[1]
    dx_mm = dx_px * scale_x
    dy_mm = dy_px * scale_y

    return cx, cy, dx_mm, dy_mm


def visualize_result(image, keypoints, fixed_point, offset_info, save_path):
    """
    可视化关键点、参考点、中心点、偏移箭头和文字
    """
    vis = image.copy()
    colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0)]
    cx, cy, dx_mm, dy_mm = offset_info
    fx, fy = map(int, fixed_point)

    # 绘制关键点
    for i, (x, y, conf) in enumerate(keypoints):
        if conf > 0.5:
            cv2.circle(vis, (int(x), int(y)), 8, colors[i], -1)
            cv2.putText(vis, f"P{i}", (int(x) + 10, int(y) - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, colors[i], 2)

    # 绘制中心点
    cv2.circle(vis, (int(cx), int(cy)), 12, (0, 255, 0), 3)
    cv2.putText(vis, "Center", (int(cx) + 20, int(cy)),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

    # 绘制参考点
    cv2.circle(vis, (fx, fy), 15, (255, 255, 0), 3)
    cv2.putText(vis, "Ref", (fx + 20, fy),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3)

    # 绘制偏移箭头和文字
    cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (0, 255, 255), 3, tipLength=0.05)
    cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 40, fy - 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)
    cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 40, fy + 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)

    cv2.imwrite(save_path, vis)


def calculate_offset_from_image(image_path, visualize=False):
    """
    主函数：输入图片路径，输出偏移量 (dx_mm, dy_mm)

    参数:
        image_path (str): 输入图像路径
        visualize (bool): 是否保存可视化结果

    返回:
        dict: {
            'success': bool,
            'dx_mm': float or None,
            'dy_mm': float or None,
            'cx': float or None,     # 中心点 x
            'cy': float or None,     # 中心点 y
            'message': str
        }
    """
    # 读取图像
    orig = cv2.imread(image_path)
    if orig is None:
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': f'Failed to load image: {image_path}'
        }

    h0, w0 = orig.shape[:2]

    # 预处理
    img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE)
    infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)

    # 加载模型并推理
    rknn = RKNNLite(verbose=False)
    ret = rknn.load_rknn(MODEL_PATH)
    if ret != 0:
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': 'Failed to load RKNN model'
        }

    try:
        rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
        outputs = rknn.inference([infer_img])
    except Exception as e:
        rknn.release()
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': f'Inference error: {str(e)}'
        }
    finally:
        rknn.release()

    # 解析关键点
    try:
        keypoints = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale)
    except Exception as e:
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': f'Parse keypoint error: {str(e)}'
        }

    # 计算偏移
    offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
    if offset_info is None:
        return {
            'success': False,
            'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
            'message': 'Not enough keypoints to compute offset'
        }

    cx, cy, dx_mm, dy_mm = offset_info

    # 可视化（可选）
    if visualize:
        vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}")
        visualize_result(orig, keypoints, FIXED_REF_POINT, offset_info, vis_save_path)

    return {
        'success': True,
        'dx_mm': dx_mm,
        'dy_mm': dy_mm,
        'cx': cx,
        'cy': cy,
        'message': 'Success'
    }


# ====================== 使用示例 ======================
if __name__ == "__main__":
    image_path = "11.jpg"

    result = calculate_offset_from_image(image_path, visualize=True)

    if result['success']:
        print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
        print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
    else:
        print("Error:", result['message'])