diff --git a/ailai_pc/point_error_test.py b/ailai_pc/point_error_test.py index e2024a2..c738808 100644 --- a/ailai_pc/point_error_test.py +++ b/ailai_pc/point_error_test.py @@ -6,8 +6,8 @@ from ultralytics import YOLO # ====================== 用户配置 ====================== #MODEL_PATH = '/home/hx/yolo/ultralytics_yolo11-main/runs/train/exp_ailai2/weights/best.pt' MODEL_PATH = 'point.pt' -IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251208' # 验证集图片目录 -LABEL_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251208' # 标签目录(与图片同名 .txt) +IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212' # 验证集图片目录 +LABEL_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212' # 标签目录(与图片同名 .txt) OUTPUT_DIR = './output_images' diff --git a/ailai_pc/point_test.py b/ailai_pc/point_test.py index 098215d..504f43e 100644 --- a/ailai_pc/point_test.py +++ b/ailai_pc/point_test.py @@ -5,7 +5,7 @@ import os # ====================== 用户配置 ====================== MODEL_PATH = '/home/hx/yolo/ultralytics_yolo11-main/runs/train/exp_ailai/weights/best.pt' -IMAGE_SOURCE_DIR = '/home/hx/开发/ailai_image_obb/ailai_pc/test' # 👈 修改为你的图像文件夹路径 +IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212' # 👈 修改为你的图像文件夹路径 OUTPUT_DIR = './output_images' # 保存结果的文件夹 # 支持的图像扩展名 diff --git a/ailai_pc/pointn.pt b/ailai_pc/pointn.pt new file mode 100644 index 0000000..28d0483 Binary files /dev/null and b/ailai_pc/pointn.pt differ diff --git a/point_diff_main/calculate_diff3.0.py b/point_diff_main/calculate_diff3.0.py new file mode 100644 index 0000000..0a3e2ad --- /dev/null +++ b/point_diff_main/calculate_diff3.0.py @@ -0,0 +1,224 @@ +import cv2 +import numpy as np +import os +from rknnlite.api import RKNNLite + +# ====================== 配置区 ====================== + +MODEL_PATH = "point.rknn" +OUTPUT_DIR = "./output_rknn" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# 固定参考点(像素坐标) +FIXED_REF_POINT = (535, 605) + +# mm/px 缩放因子(根据标定数据填写) +width_mm = 70.0 +width_px = 42 +SCALE_X = width_mm / float(width_px) +height_mm = 890.0 +height_px = 507 +SCALE_Y = height_mm / float(height_px) +print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px") + +# 输入尺寸 +IMG_SIZE = (640, 640) + +# ====================== RKNN 单例管理 ====================== +_rknn_instance = None + +def init_rknn_once(model_path): + """只加载一次 RKNN 模型""" + global _rknn_instance + if _rknn_instance is None: + _rknn_instance = RKNNLite(verbose=False) + ret = _rknn_instance.load_rknn(model_path) + if ret != 0: + print(f"[ERROR] Failed to load RKNN: {ret}") + _rknn_instance = None + return None + ret = _rknn_instance.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + if ret != 0: + print(f"[ERROR] Failed to init RKNN runtime: {ret}") + _rknn_instance = None + return None + return _rknn_instance + +def release_rknn(): + """释放 RKNN 单例""" + global _rknn_instance + if _rknn_instance: + _rknn_instance.release() + _rknn_instance = None + +# ====================== 工具函数 ====================== + +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + +def safe_sigmoid(x): + x = np.clip(x, -50, 50) + return 1.0 / (1.0 + np.exp(-x)) + +def softmax(x): + x = x - np.max(x) + e = np.exp(x) + return e / e.sum() + +def dfl_to_xywh(loc, grid_x, grid_y, stride): + xywh_ = np.zeros(4) + xywh = np.zeros(4) + for i in range(4): + l = loc[i * 16:(i + 1) * 16] + l = softmax(l) + xywh_[i] = sum([j * l[j] for j in range(16)]) + xywh_[0] = (grid_x + 0.5) - xywh_[0] + xywh_[1] = (grid_y + 0.5) - xywh_[1] + xywh_[2] = (grid_x + 0.5) + xywh_[2] + xywh_[3] = (grid_y + 0.5) + xywh_[3] + xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride + xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride + xywh[2] = (xywh_[2] - xywh_[0]) * stride + xywh[3] = (xywh_[3] - xywh_[1]) * stride + xywh[0] = xywh[0] - xywh[2] / 2 + xywh[1] = xywh[1] - xywh[3] / 2 + return xywh + +def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0): + boxes = [] + obj_confs = [] + class_ids = [] + for idx in range(3): + det = np.array(outputs[idx])[0] + C, H, W = det.shape + num_classes = C - 64 + stride = 640 // H + for h in range(H): + for w in range(W): + for c in range(num_classes): + conf = safe_sigmoid(det[64 + c, h, w]) + if conf >= conf_threshold: + loc = det[:64, h, w].astype(np.float32) + xywh = dfl_to_xywh(loc, w, h, stride) + boxes.append(xywh) + obj_confs.append(conf) + class_ids.append(c) + if not obj_confs: + best_box = np.array([0, 0, 0, 0]) + class_id = -1 + obj_conf = 0.0 + else: + max_idx = np.argmax(obj_confs) + best_box = boxes[max_idx] + class_id = class_ids[max_idx] + obj_conf = obj_confs[max_idx] + x, y, w, h = best_box + x = (x - dx) / scale + y = (y - dy) / scale + w = w / scale + h = h / scale + best_box = np.array([x, y, w, h]) + kpt_output = np.array(outputs[3])[0] + confs = kpt_output[:, 2, :] + best_anchor_idx = np.argmax(np.mean(confs, axis=0)) + kpt_data = kpt_output[:, :, best_anchor_idx] + keypoints = [] + for i in range(kpt_data.shape[0]): + x_img, y_img, vis_conf_raw = kpt_data[i] + vis_prob = safe_sigmoid(vis_conf_raw) + x_orig = (x_img - dx) / scale + y_orig = (y_img - dy) / scale + keypoints.append([x_orig, y_orig, vis_prob]) + return np.array(keypoints), class_id, obj_conf, best_box + +def compute_offset(keypoints, fixed_point, scale_x, scale_y): + if len(keypoints) < 2: + return None + p1, p2 = keypoints[0], keypoints[1] + cx = (p1[0] + p2[0]) / 2.0 + cy = (p1[1] + p2[1]) / 2.0 + dx_mm = (cx - fixed_point[0]) * scale_x + dy_mm = (cy - fixed_point[1]) * scale_y + return cx, cy, dx_mm, dy_mm + +def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path): + vis = image.copy() + colors = [(0, 0, 255), (0, 255, 255)] + cx, cy, dx_mm, dy_mm = offset_info + fx, fy = map(int, fixed_point) + for i, (x, y, conf) in enumerate(keypoints[:2]): + if conf > 0.5: + cv2.circle(vis, (int(x), int(y)), 6, colors[i], -1) + if len(keypoints) >= 2: + cv2.line(vis, (int(keypoints[0][0]), int(keypoints[0][1])), + (int(keypoints[1][0]), int(keypoints[1][1])), (0, 255, 0), 2) + x, y, w, h = bbox + cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2) + cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3) + cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3) + cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05) + cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + cv2.imwrite(save_path, vis) + +# ====================== 主函数 ====================== + +def calculate_offset_from_image(image_path, visualize=False): + orig = cv2.imread(image_path) + if orig is None: + return {'success': False, 'message': f'Failed to load image: {image_path}'} + + img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE) + infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8) + + rknn = init_rknn_once(MODEL_PATH) + if rknn is None: + return {'success': False, 'message': 'Failed to init RKNN'} + + try: + outputs = rknn.inference([infer_img]) + except Exception as e: + return {'success': False, 'message': f'RKNN inference error: {str(e)}'} + + try: + keypoints, class_id, obj_conf, bbox = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale) + except Exception as e: + return {'success': False, 'message': f'Parse error: {str(e)}'} + + offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y) + if offset_info is None: + return {'success': False, 'message': 'Not enough keypoints'} + + cx, cy, dx_mm, dy_mm = offset_info + + if visualize: + vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}") + visualize_result(orig, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path) + + return {'success': True, 'dx_mm': dx_mm, 'dy_mm': dy_mm, + 'cx': cx, 'cy': cy, 'class_id': class_id, + 'obj_conf': obj_conf, 'bbox': bbox, + 'message': 'Success'} + +# ====================== 示例调用 ====================== +if __name__ == "__main__": + image_path = "11.jpg" + result = calculate_offset_from_image(image_path, visualize=False) + if result['success']: + print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})") + print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm") + print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}") + print(f"BBox: {result['bbox']}") + else: + print("Error:", result['message']) +