Files
ailai_image_point_diff/point_diff_main/calculate_diff2.0.py

237 lines
7.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import numpy as np
import os
from rknnlite.api import RKNNLite
# ====================== 配置区 ======================
MODEL_PATH = "point.rknn"
OUTPUT_DIR = "./output_rknn"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# 固定参考点(像素坐标)
FIXED_REF_POINT = (535, 605)
# mm/px 缩放因子(根据标定数据填写)
width_mm = 70.0
width_px = 42
SCALE_X = width_mm / float(width_px)
height_mm = 890.0
height_px = 507
SCALE_Y = height_mm / float(height_px)
print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px")
# 输入尺寸
IMG_SIZE = (640, 640)
def letterbox_resize(image, size, bg_color=114):
target_w, target_h = size
h, w = image.shape[:2]
scale = min(target_w / w, target_h / h)
new_w, new_h = int(w * scale), int(h * scale)
resized = cv2.resize(image, (new_w, new_h))
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
canvas[dy:dy + new_h, dx:dx + new_w] = resized
return canvas, scale, dx, dy
def safe_sigmoid(x):
x = np.clip(x, -50, 50)
return 1.0 / (1.0 + np.exp(-x))
def softmax(x):
x = x - np.max(x)
e = np.exp(x)
return e / e.sum()
def dfl_to_xywh(loc, grid_x, grid_y, stride):
"""将 DFL 输出解析为 xywh"""
xywh_ = np.zeros(4)
xywh = np.zeros(4)
# 每个维度 16 bins 做 softmax
for i in range(4):
l = loc[i * 16:(i + 1) * 16]
l = softmax(l)
xywh_[i] = sum([j * l[j] for j in range(16)])
# 对应公式
xywh_[0] = (grid_x + 0.5) - xywh_[0]
xywh_[1] = (grid_y + 0.5) - xywh_[1]
xywh_[2] = (grid_x + 0.5) + xywh_[2]
xywh_[3] = (grid_y + 0.5) + xywh_[3]
# 转成中心点 + 宽高
xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride
xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride
xywh[2] = (xywh_[2] - xywh_[0]) * stride
xywh[3] = (xywh_[3] - xywh_[1]) * stride
# 转为左上角坐标
xywh[0] = xywh[0] - xywh[2] / 2
xywh[1] = xywh[1] - xywh[3] / 2
return xywh
def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
"""
完整解析 RKNN YOLO-Pose 输出
返回 keypoints, class_id, obj_conf, bbox已映射回原图
"""
boxes = []
obj_confs = []
class_ids = []
# 遍历前三个输出 tensor (det 输出)
for idx in range(3):
det = np.array(outputs[idx])[0] # (C,H,W)
C, H, W = det.shape
num_classes = C - 64 # 前64通道为 DFL bbox
stride = 640 // H
for h in range(H):
for w in range(W):
for c in range(num_classes):
conf = safe_sigmoid(det[64 + c, h, w])
if conf >= conf_threshold:
loc = det[:64, h, w].astype(np.float32)
xywh = dfl_to_xywh(loc, w, h, stride)
boxes.append(xywh)
obj_confs.append(conf)
class_ids.append(c)
if not obj_confs:
best_box = np.array([0, 0, 0, 0])
class_id = -1
obj_conf = 0.0
else:
max_idx = np.argmax(obj_confs)
best_box = boxes[max_idx]
class_id = class_ids[max_idx]
obj_conf = obj_confs[max_idx]
# 🔹 bbox 坐标映射回原图
x, y, w, h = best_box
x = (x - dx) / scale
y = (y - dy) / scale
w = w / scale
h = h / scale
best_box = np.array([x, y, w, h])
# 🔹 关键点解析
kpt_output = np.array(outputs[3])[0] # (num_kpts, 3, num_anchor)
confs = kpt_output[:, 2, :]
best_anchor_idx = np.argmax(np.mean(confs, axis=0))
kpt_data = kpt_output[:, :, best_anchor_idx]
keypoints = []
for i in range(kpt_data.shape[0]):
x_img, y_img, vis_conf_raw = kpt_data[i]
vis_prob = safe_sigmoid(vis_conf_raw)
x_orig = (x_img - dx) / scale
y_orig = (y_img - dy) / scale
keypoints.append([x_orig, y_orig, vis_prob])
return np.array(keypoints), class_id, obj_conf, best_box
def compute_offset(keypoints, fixed_point, scale_x, scale_y):
if len(keypoints) < 2:
return None
p1, p2 = keypoints[0], keypoints[1]
cx = (p1[0] + p2[0]) / 2.0
cy = (p1[1] + p2[1]) / 2.0
dx_mm = (cx - fixed_point[0]) * scale_x
dy_mm = (cy - fixed_point[1]) * scale_y
return cx, cy, dx_mm, dy_mm
def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path):
vis = image.copy()
colors = [(0, 0, 255), (0, 255, 255)]
cx, cy, dx_mm, dy_mm = offset_info
fx, fy = map(int, fixed_point)
# 绘制关键点
for i, (x, y, conf) in enumerate(keypoints[:2]):
if conf > 0.5:
cv2.circle(vis, (int(x), int(y)), 6, colors[i], -1)
if len(keypoints) >= 2:
cv2.line(vis,
(int(keypoints[0][0]), int(keypoints[0][1])),
(int(keypoints[1][0]), int(keypoints[1][1])),
(0, 255, 0), 2)
# 绘制 bbox
x, y, w, h = bbox
cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2)
# 绘制中心点
cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3)
cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3)
cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05)
cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
cv2.imwrite(save_path, vis)
def calculate_offset_from_image(image_path, visualize=False):
orig = cv2.imread(image_path)
if orig is None:
return {'success': False, 'message': f'Failed to load image: {image_path}'}
img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE)
infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)
rknn = RKNNLite(verbose=False)
ret = rknn.load_rknn(MODEL_PATH)
if ret != 0:
return {'success': False, 'message': 'Failed to load RKNN model'}
try:
rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
outputs = rknn.inference([infer_img])
finally:
rknn.release()
try:
keypoints, class_id, obj_conf, bbox = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale)
except Exception as e:
return {'success': False, 'message': f'Parse error: {str(e)}'}
offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
if offset_info is None:
return {'success': False, 'message': 'Not enough keypoints'}
cx, cy, dx_mm, dy_mm = offset_info
if visualize:
vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}")
visualize_result(orig, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path)
return {'success': True, 'dx_mm': dx_mm, 'dy_mm': dy_mm,
'cx': cx, 'cy': cy, 'class_id': class_id,
'obj_conf': obj_conf, 'bbox': bbox,
'message': 'Success'}
# ====================== 使用示例 ======================
if __name__ == "__main__":
image_path = "11.jpg"
result = calculate_offset_from_image(image_path, visualize=True)
if result['success']:
print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}")
print(f"BBox: {result['bbox']}")
else:
print("Error:", result['message'])