Files
ailai_image_point_diff/point_save/calculate_diff.py
2026-01-08 17:25:14 +08:00

230 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import numpy as np
import os
from rknnlite.api import RKNNLite
# ====================== 配置区 ======================
MODEL_PATH = "point.rknn"
OUTPUT_DIR = "./output_rknn"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# 固定参考点(像素坐标)
FIXED_REF_POINT = (535, 605)
# mm/px 缩放因子(根据标定数据填写)
width_mm = 70.0
width_px = 42
SCALE_X = width_mm / float(width_px)
height_mm = 890.0
height_px = 507
SCALE_Y = height_mm / float(height_px)
print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px")
# 输入尺寸
IMG_SIZE = (640, 640)
def letterbox_resize(image, size, bg_color=114):
"""保持比例缩放并填充到指定大小"""
target_w, target_h = size
h, w = image.shape[:2]
scale = min(target_w / w, target_h / h)
new_w, new_h = int(w * scale), int(h * scale)
resized = cv2.resize(image, (new_w, new_h))
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
canvas[dy:dy + new_h, dx:dx + new_w] = resized
return canvas, scale, dx, dy
def safe_sigmoid(x):
x = np.clip(x, -50, 50)
return 1.0 / (1.0 + np.exp(-x))
def parse_pose_outputs(outputs, dx=0, dy=0, scale=1.0):
"""
解析 RKNN YOLO-Pose 关键点输出
outputs[3]: shape (1, 4, 3, 8400) -> [kpt_id, (x,y,conf), anchor]
"""
kpt_output = np.array(outputs[3])[0] # (4, 3, 8400)
confs = kpt_output[:, 2, :] # 取每个关键点的 visible_conf
mean_conf_per_anchor = np.mean(confs, axis=0) # 每个 anchor 的平均可见性
best_anchor_idx = np.argmax(mean_conf_per_anchor)
kpt_data = kpt_output[:, :, best_anchor_idx] # (4, 3): x, y, vis_conf
keypoints = []
for i in range(4):
x_img = kpt_data[i, 0]
y_img = kpt_data[i, 1]
vis_conf_raw = kpt_data[i, 2]
vis_prob = safe_sigmoid(vis_conf_raw)
# 映射回原图坐标
x_orig = (x_img - dx) / scale
y_orig = (y_img - dy) / scale
keypoints.append([x_orig, y_orig, vis_prob])
return np.array(keypoints)
def compute_offset(keypoints, fixed_point, scale_x, scale_y):
"""
计算中心点相对于固定参考点的偏移量mm
中心点 = P0 和 P1 的中点
返回: (center_x, center_y, dx_mm, dy_mm)
"""
if len(keypoints) < 2:
return None
p1, p2 = keypoints[0], keypoints[1]
cx = (p1[0] + p2[0]) / 2.0
cy = (p1[1] + p2[1]) / 2.0
dx_px = cx - fixed_point[0]
dy_px = cy - fixed_point[1]
dx_mm = dx_px * scale_x
dy_mm = dy_px * scale_y
return cx, cy, dx_mm, dy_mm
def visualize_result(image, keypoints, fixed_point, offset_info, save_path):
"""
可视化关键点、参考点、中心点、偏移箭头和文字
"""
vis = image.copy()
colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0)]
cx, cy, dx_mm, dy_mm = offset_info
fx, fy = map(int, fixed_point)
# 绘制关键点
for i, (x, y, conf) in enumerate(keypoints):
if conf > 0.5:
cv2.circle(vis, (int(x), int(y)), 8, colors[i], -1)
cv2.putText(vis, f"P{i}", (int(x) + 10, int(y) - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, colors[i], 2)
# 绘制中心点
cv2.circle(vis, (int(cx), int(cy)), 12, (0, 255, 0), 3)
cv2.putText(vis, "Center", (int(cx) + 20, int(cy)),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
# 绘制参考点
cv2.circle(vis, (fx, fy), 15, (255, 255, 0), 3)
cv2.putText(vis, "Ref", (fx + 20, fy),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3)
# 绘制偏移箭头和文字
cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (0, 255, 255), 3, tipLength=0.05)
cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 40, fy - 40),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)
cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 40, fy + 40),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)
cv2.imwrite(save_path, vis)
def calculate_offset_from_image(image_path, visualize=False):
"""
主函数:输入图片路径,输出偏移量 (dx_mm, dy_mm)
参数:
image_path (str): 输入图像路径
visualize (bool): 是否保存可视化结果
返回:
dict: {
'success': bool,
'dx_mm': float or None,
'dy_mm': float or None,
'cx': float or None, # 中心点 x
'cy': float or None, # 中心点 y
'message': str
}
"""
# 读取图像
orig = cv2.imread(image_path)
if orig is None:
return {
'success': False,
'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
'message': f'Failed to load image: {image_path}'
}
h0, w0 = orig.shape[:2]
# 预处理
img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE)
infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)
# 加载模型并推理
rknn = RKNNLite(verbose=False)
ret = rknn.load_rknn(MODEL_PATH)
if ret != 0:
return {
'success': False,
'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
'message': 'Failed to load RKNN model'
}
try:
rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
outputs = rknn.inference([infer_img])
except Exception as e:
rknn.release()
return {
'success': False,
'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
'message': f'Inference error: {str(e)}'
}
finally:
rknn.release()
# 解析关键点
try:
keypoints = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale)
except Exception as e:
return {
'success': False,
'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
'message': f'Parse keypoint error: {str(e)}'
}
# 计算偏移
offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
if offset_info is None:
return {
'success': False,
'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None,
'message': 'Not enough keypoints to compute offset'
}
cx, cy, dx_mm, dy_mm = offset_info
# 可视化(可选)
if visualize:
vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}")
visualize_result(orig, keypoints, FIXED_REF_POINT, offset_info, vis_save_path)
return {
'success': True,
'dx_mm': dx_mm,
'dy_mm': dy_mm,
'cx': cx,
'cy': cy,
'message': 'Success'
}
# ====================== 使用示例 ======================
if __name__ == "__main__":
image_path = "11.jpg"
result = calculate_offset_from_image(image_path, visualize=True)
if result['success']:
print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
else:
print("Error:", result['message'])