# detect_fixed.py import cv2 import numpy as np import os import platform from rknnlite.api import RKNNLite # ====================== 配置区 ====================== IMAGE_PATH = "11.jpg" # 测试图片 MODEL_PATH = "point.rknn" OUTPUT_DIR = "./output_rknn" os.makedirs(OUTPUT_DIR, exist_ok=True) # 固定参考点 & 缩放因子 FIXED_REF_POINT = (535, 605) width_mm, width_px = 70.0, 42 height_mm, height_px = 890.0, 507 SCALE_X = width_mm / float(width_px) SCALE_Y = height_mm / float(height_px) print(f"[INFO] Scale factors: X={SCALE_X:.3f} mm/px, Y={SCALE_Y:.3f} mm/px") IMG_SIZE = (640, 640) # 设备树路径(用于自动识别平台) DEVICE_COMPATIBLE_NODE = '/proc/device-tree/compatible' def get_host(): system = platform.system() machine = platform.machine() if system == 'Linux' and machine == 'aarch64': try: with open(DEVICE_COMPATIBLE_NODE, 'r') as f: compatible = f.read() if 'rk3588' in compatible: return 'RK3588' elif 'rk3576' in compatible: return 'RK3576' elif 'rk3562' in compatible: return 'RK3562' else: return 'RK3566_RK3568' except Exception as e: print(f"Read device tree failed: {e}") exit(-1) else: return f"{system}-{machine}" def letterbox_resize(image, size, bg_color=114): target_w, target_h = size h, w = image.shape[:2] scale = min(target_w / w, target_h / h) new_w, new_h = int(w * scale), int(h * scale) resized = cv2.resize(image, (new_w, new_h)) canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 canvas[dy:dy + new_h, dx:dx + new_w] = resized return canvas, scale, dx, dy def safe_sigmoid(x): x = np.clip(x, -50, 50) return 1.0 / (1.0 + np.exp(-x)) def softmax(x): x = x - np.max(x) e = np.exp(x) return e / e.sum() def dfl_to_xywh(loc, grid_x, grid_y, stride): xywh_ = np.zeros(4) xywh = np.zeros(4) for i in range(4): l = loc[i * 16:(i + 1) * 16] l = softmax(l) xywh_[i] = sum(j * l[j] for j in range(16)) xywh_[0] = (grid_x + 0.5) - xywh_[0] xywh_[1] = (grid_y + 0.5) - xywh_[1] xywh_[2] = (grid_x + 0.5) + xywh_[2] xywh_[3] = (grid_y + 0.5) + xywh_[3] xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride xywh[2] = (xywh_[2] - xywh_[0]) * stride xywh[3] = (xywh_[3] - xywh_[1]) * stride xywh[0] -= xywh[2] / 2 xywh[1] -= xywh[3] / 2 return xywh def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0): boxes = [] obj_confs = [] class_ids = [] for idx in range(3): # det head det = np.array(outputs[idx])[0] C, H, W = det.shape num_classes = C - 64 stride = 640 // H for h in range(H): for w in range(W): for c in range(num_classes): conf = safe_sigmoid(det[64 + c, h, w]) if conf >= conf_threshold: loc = det[:64, h, w].astype(np.float32) xywh = dfl_to_xywh(loc, w, h, stride) boxes.append(xywh) obj_confs.append(conf) class_ids.append(c) if not obj_confs: best_box = np.array([0, 0, 0, 0]) class_id = -1 obj_conf = 0.0 else: max_idx = np.argmax(obj_confs) best_box = boxes[max_idx] class_id = class_ids[max_idx] obj_conf = obj_confs[max_idx] x, y, w, h = best_box x = (x - dx) / scale y = (y - dy) / scale w /= scale h /= scale best_box = [x, y, w, h] kpt_output = np.array(outputs[3])[0] confs = kpt_output[:, 2, :] best_anchor_idx = np.argmax(np.mean(confs, axis=0)) kpt_data = kpt_output[:, :, best_anchor_idx] keypoints = [] for i in range(kpt_data.shape[0]): x_img, y_img, vis_raw = kpt_data[i] vis_prob = safe_sigmoid(vis_raw) x_orig = (x_img - dx) / scale y_orig = (y_img - dy) / scale keypoints.append([x_orig, y_orig, vis_prob]) return np.array(keypoints), class_id, obj_conf, best_box def compute_offset(keypoints, fixed_point, scale_x, scale_y): if len(keypoints) < 2: return None p1, p2 = keypoints[0], keypoints[1] cx = (p1[0] + p2[0]) / 2 cy = (p1[1] + p2[1]) / 2 dx_mm = (cx - fixed_point[0]) * scale_x dy_mm = (cy - fixed_point[1]) * scale_y return cx, cy, dx_mm, dy_mm def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path): vis = image.copy() fx, fy = map(int, fixed_point) cx, cy, dx_mm, dy_mm = offset_info for i, (x, y, conf) in enumerate(keypoints[:2]): if conf > 0.5: color = (0, 0, 255) if i == 0 else (0, 255, 255) cv2.circle(vis, (int(x), int(y)), 6, color, -1) if len(keypoints) >= 2: cv2.line(vis, (int(keypoints[0][0]), int(keypoints[0][1])), (int(keypoints[1][0]), int(keypoints[1][1])), (0, 255, 0), 2) x, y, w, h = bbox cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2) cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3) cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3) cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05) cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) cv2.imwrite(save_path, vis) def main(): host_name = get_host() print(f"[INFO] Running on: {host_name}") rknn = RKNNLite() # ---- Load Model ---- ret = rknn.load_rknn(MODEL_PATH) if ret != 0: print("❌ Failed to load RKNN model!") exit(ret) print("✅ Model loaded successfully.") # ---- Init Runtime ---- if host_name in ['RK3576', 'RK3588']: ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) else: ret = rknn.init_runtime() if ret != 0: print("❌ Init runtime failed!") rknn.release() exit(ret) print("✅ Runtime initialized.") # ---- Preprocess ---- ori_img = cv2.imread(IMAGE_PATH) if ori_img is None: print(f"❌ Cannot read image: {IMAGE_PATH}") return img_resized, scale, dx, dy = letterbox_resize(ori_img, IMG_SIZE) input_tensor = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8) # RGB # ---- Inference ---- print("🔍 Starting inference...") outputs = rknn.inference(inputs=[input_tensor]) print("✅ Inference completed.") # ---- Post-process ---- try: keypoints, cls_id, obj_conf, bbox = parse_pose_outputs( outputs, dx=dx, dy=dy, scale=scale) offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y) if offset_info is None: print("⚠️ Not enough keypoints detected.") return cx, cy, dx_mm, dy_mm = offset_info vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(IMAGE_PATH)}") visualize_result(ori_img, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path) print(f"\n🎯 Detection Result:") print(f"Center: ({cx:.1f}, {cy:.1f})") print(f"Offset: ΔX={dx_mm:+.2f}mm, ΔY={dy_mm:+.2f}mm") print(f"Class: {cls_id}, Confidence: {obj_conf:.3f}") print(f"Saved result to: {vis_save_path}") except Exception as e: print(f"❌ Post-processing error: {e}") import traceback traceback.print_exc() finally: rknn.release() if __name__ == "__main__": main()