diff --git a/cls_main/cls_rk3568.rknn b/cls_main/cls_rk3568.rknn index 6ace18f..05a654c 100644 Binary files a/cls_main/cls_rk3568.rknn and b/cls_main/cls_rk3568.rknn differ diff --git a/cls_main/cls_rk3588.rknn b/cls_main/cls_rk3588.rknn new file mode 100644 index 0000000..83bc182 Binary files /dev/null and b/cls_main/cls_rk3588.rknn differ diff --git a/cls_main/main_cls.py b/cls_main/main_cls.py new file mode 100644 index 0000000..f5c66f6 --- /dev/null +++ b/cls_main/main_cls.py @@ -0,0 +1,99 @@ +import cv2 +import numpy as np +import platform +from rknnlite.api import RKNNLite + +# ------------------- 全局变量 ------------------- +_global_rknn_instance = None +labels = {0: '夹具夹紧', 1: '夹具打开'} + +# ROI: x, y, w, h +ROI = (818, 175, 1381, 1271) # 示例 + +DEVICE_COMPATIBLE_NODE = '/proc/device-tree/compatible' + + +# ------------------- 主机信息 ------------------- +def get_host(): + system = platform.system() + machine = platform.machine() + os_machine = system + '-' + machine + if os_machine == 'Linux-aarch64': + try: + with open(DEVICE_COMPATIBLE_NODE) as f: + device_compatible_str = f.read() + if 'rk3562' in device_compatible_str: + host = 'RK3562' + elif 'rk3576' in device_compatible_str: + host = 'RK3576' + elif 'rk3588' in device_compatible_str: + host = 'RK3588' + else: + host = 'RK3566_RK3568' + except IOError: + print('Read device node {} failed.'.format(DEVICE_COMPATIBLE_NODE)) + exit(-1) + else: + host = os_machine + return host + + +# ------------------- RKNN 模型初始化(只加载一次) ------------------- +def init_rknn_model(model_path): + global _global_rknn_instance + if _global_rknn_instance is None: + rknn_lite = RKNNLite(verbose=False) + ret = rknn_lite.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f'Load model failed: {ret}') + ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + if ret != 0: + raise RuntimeError(f'Init runtime failed: {ret}') + _global_rknn_instance = rknn_lite + print(f'[INFO] RKNN model loaded: {model_path}') + return _global_rknn_instance + + +# ------------------- 图像预处理 + ROI 裁剪 ------------------- +def preprocess(raw_image, target_size=(640, 640)): + """ + ROI 裁剪 + resize + batch 维度 + """ + global ROI + x, y, w, h = ROI + roi_img = raw_image[y:y+h, x:x+w] + img_resized = cv2.resize(roi_img, target_size) + img_batch = np.expand_dims(img_resized, 0) # 添加 batch 维度 + return img_batch + + +# ------------------- 推理函数 ------------------- +def yolov11_cls_inference_once(rknn, raw_image, target_size=(640, 640)): + """ + 使用已加载的 rknn 实例进行推理 + 返回: (class_id, boolean) + """ + img = preprocess(raw_image, target_size) + outputs = rknn.inference([img]) + output = outputs[0].reshape(-1) + class_id = int(np.argmax(output)) + bool_value = class_id == 1 + return class_id, bool_value + + +# ------------------- 测试 ------------------- +if __name__ == '__main__': + image_path = "./test_image/class1/2.jpg" + model_path = "cls_rk3588.rknn" + + bgr_image = cv2.imread(image_path) + if bgr_image is None: + raise RuntimeError(f"Failed to read image: {image_path}") + rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) + + # 只初始化一次模型 + rknn_model = init_rknn_model(model_path) + + # 多次调用都用同一个 rknn_model + class_id, bool_value = yolov11_cls_inference_once(rknn_model, rgb_image) + print(f"类别ID: {class_id}, 布尔值: {bool_value}") diff --git a/cls_main/readme.md b/cls_main/readme.md new file mode 100644 index 0000000..adbd843 --- /dev/null +++ b/cls_main/readme.md @@ -0,0 +1,58 @@ +# yolov11_cls_inference README + +## 概述 +该模块用于对米厂输入图像执行二分类推理,用于判断机械臂夹爪是否夹紧。 + +类别定义: + +0 → 夹具夹紧 (False) +1 → 夹具打开 (True) + +rknn模型只加载一次,复用全局实例,提高推理效率。 + +## 调用示例 + +您可以直接调用 yolov11_cls_inference 函数,以便集成到其他项目中: + +示例 1: 单张图片推理 + +```bash +from main_cls import yolov11_cls_inference +import cv2 + +# 读取图像 +bgr_image = cv2.imread("11.jpg") +rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) + +# 调用推理函数 +class_id, bool_value = yolov11_cls_inference( + model_path="yolov11_cls.rknn", + raw_image=rgb_image, + target_size=(640, 640) +) + +print(f"类别ID: {class_id}, 布尔值: {bool_value}") + +``` + +示例 2: 多次推理(复用模型) +```bash + +from main_cls import init_rknn_model, yolov11_cls_inference_once +import cv2 + +# 初始化一次模型 +rknn_model = init_rknn_model("cls_rk3568.rknn") + +# 读取图像 +bgr_image = cv2.imread("12.jpg") +rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) + +# 使用已加载模型进行推理 +class_id, bool_value = yolov11_cls_inference_once(rknn_model, rgb_image) + +if bool_value: + print("夹具夹紧") +else: + print("夹具打开") +``` diff --git a/cls_main/test_image/class0/1.jpg b/cls_main/test_image/class0/1.jpg new file mode 100644 index 0000000..1fdf5ba Binary files /dev/null and b/cls_main/test_image/class0/1.jpg differ diff --git a/cls_main/test_image/class1/1.jpg b/cls_main/test_image/class1/1.jpg new file mode 100644 index 0000000..34d51dc Binary files /dev/null and b/cls_main/test_image/class1/1.jpg differ diff --git a/cls_main/test_image/class1/2.jpg b/cls_main/test_image/class1/2.jpg new file mode 100644 index 0000000..9b92198 Binary files /dev/null and b/cls_main/test_image/class1/2.jpg differ diff --git a/point_diff_main/README.md b/point_diff_main/README.md new file mode 100644 index 0000000..24dd210 --- /dev/null +++ b/point_diff_main/README.md @@ -0,0 +1,94 @@ +# RKNN 关键点推理与偏移量计算工具 + +该工具通过使用RKNN模型对输入图像进行关键点检测,并根据检测结果计算相对于固定参考点的偏移量(单位:毫米)。此外,还提供了可视化选项来展示计算结果。 + +## 目录结构 + +├── calculate_offset.py # 主程序脚本 +├── point.rknn # RKNN 模型文件 (请确保正确路径) +└── README.md # 说明文档 + +## 配置 + +在 `calculate_offset.py` 文件顶部的配置区中,您可以修改如下参数以适应您的需求: + +- **MODEL_PATH**: RKNN 模型文件路径。 +- **OUTPUT_DIR**: 输出目录路径。 +- **FIXED_REF_POINT**: 固定参考点坐标(像素)。 +- **SCALE_X**, **SCALE_Y**: 缩放因子,用于将像素坐标转换为毫米。 +- **IMG_SIZE**: 输入图像尺寸。 + +## 安装依赖 + +请确保安装了必要的 Python 库。可以通过 pip 安装: + +```bash +pip install opencv-python numpy rknnlite +``` + +## 函数调用1.0 + +您也可以直接调用 calculate_offset_from_image 函数,以便集成到其他项目中: +示例 1: 仅获取偏移量(不画图) + +```bash +from calculate_offset import calculate_offset_from_image +result = calculate_offset_from_image("your_image_path.jpg", visualize=False) +if result['success']: + print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm") +else: + print("Error:", result['message']) +``` +示例 2: 获取偏移量并保存可视化图 + +```bash +from calculate_offset import calculate_offset_from_image +result = calculate_offset_from_image("your_image_path.jpg", visualize=True) +``` + +## 函数调用2.0 + +示例 1: 仅获取偏移量(不画图) + +```bash +from calculate_diff2.0 import calculate_offset_from_image + +result = calculate_offset_from_image("11.jpg", visualize=False) +if result['success']: + print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm") +else: + print("Error:", result['message']) + +``` +示例 2: 获取偏移量并保存可视化图 + +```bash +from calculate_diff2.0 import calculate_offset_from_image + +result = calculate_offset_from_image("11.jpg", visualize=True) + +``` + +##该函数返回一个包含下列字段的字典1.0: + + success: 成功标志(True/False) + dx_mm: 水平偏移(毫米) + dy_mm: 垂直偏移(毫米) + cx: 中心点 x 坐标(像素) + cy: 中心点 y 坐标(像素) + message: 错误信息或成功提示 + +##该函数返回一个包含下列字段的字典2.0: + + success: 成功标志(True/False) + dx_mm: 水平偏移(毫米) + dy_mm: 垂直偏移(毫米) + cx: 中心点 x 坐标(像素) + cy: 中心点 y 坐标(像素) + message: 错误信息或成功提示 + class_id: 检测类别 ID #这里是bag的id是0 + obj_conf: 检测置信度 #这就是识别为料袋的置信度 + bbox: 检测矩形框 [x_left, y_top, width, height] + message: 错误信息或成功提示 + + diff --git a/point_diff_main/caculate_diff(可用设备树版本,测试,不用做推理).py b/point_diff_main/caculate_diff(可用设备树版本,测试,不用做推理).py new file mode 100644 index 0000000..7e37a3c --- /dev/null +++ b/point_diff_main/caculate_diff(可用设备树版本,测试,不用做推理).py @@ -0,0 +1,256 @@ +# detect_fixed.py +import cv2 +import numpy as np +import os +import platform +from rknnlite.api import RKNNLite + +# ====================== 配置区 ====================== + +IMAGE_PATH = "11.jpg" # 测试图片 +MODEL_PATH = "point.rknn" +OUTPUT_DIR = "./output_rknn" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# 固定参考点 & 缩放因子 +FIXED_REF_POINT = (535, 605) +width_mm, width_px = 70.0, 42 +height_mm, height_px = 890.0, 507 +SCALE_X = width_mm / float(width_px) +SCALE_Y = height_mm / float(height_px) +print(f"[INFO] Scale factors: X={SCALE_X:.3f} mm/px, Y={SCALE_Y:.3f} mm/px") + +IMG_SIZE = (640, 640) + +# 设备树路径(用于自动识别平台) +DEVICE_COMPATIBLE_NODE = '/proc/device-tree/compatible' + + +def get_host(): + system = platform.system() + machine = platform.machine() + if system == 'Linux' and machine == 'aarch64': + try: + with open(DEVICE_COMPATIBLE_NODE, 'r') as f: + compatible = f.read() + if 'rk3588' in compatible: + return 'RK3588' + elif 'rk3576' in compatible: + return 'RK3576' + elif 'rk3562' in compatible: + return 'RK3562' + else: + return 'RK3566_RK3568' + except Exception as e: + print(f"Read device tree failed: {e}") + exit(-1) + else: + return f"{system}-{machine}" + + +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + + +def safe_sigmoid(x): + x = np.clip(x, -50, 50) + return 1.0 / (1.0 + np.exp(-x)) + + +def softmax(x): + x = x - np.max(x) + e = np.exp(x) + return e / e.sum() + + +def dfl_to_xywh(loc, grid_x, grid_y, stride): + xywh_ = np.zeros(4) + xywh = np.zeros(4) + for i in range(4): + l = loc[i * 16:(i + 1) * 16] + l = softmax(l) + xywh_[i] = sum(j * l[j] for j in range(16)) + + xywh_[0] = (grid_x + 0.5) - xywh_[0] + xywh_[1] = (grid_y + 0.5) - xywh_[1] + xywh_[2] = (grid_x + 0.5) + xywh_[2] + xywh_[3] = (grid_y + 0.5) + xywh_[3] + + xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride + xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride + xywh[2] = (xywh_[2] - xywh_[0]) * stride + xywh[3] = (xywh_[3] - xywh_[1]) * stride + + xywh[0] -= xywh[2] / 2 + xywh[1] -= xywh[3] / 2 + return xywh + + +def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0): + boxes = [] + obj_confs = [] + class_ids = [] + + for idx in range(3): # det head + det = np.array(outputs[idx])[0] + C, H, W = det.shape + num_classes = C - 64 + stride = 640 // H + + for h in range(H): + for w in range(W): + for c in range(num_classes): + conf = safe_sigmoid(det[64 + c, h, w]) + if conf >= conf_threshold: + loc = det[:64, h, w].astype(np.float32) + xywh = dfl_to_xywh(loc, w, h, stride) + boxes.append(xywh) + obj_confs.append(conf) + class_ids.append(c) + + if not obj_confs: + best_box = np.array([0, 0, 0, 0]) + class_id = -1 + obj_conf = 0.0 + else: + max_idx = np.argmax(obj_confs) + best_box = boxes[max_idx] + class_id = class_ids[max_idx] + obj_conf = obj_confs[max_idx] + + x, y, w, h = best_box + x = (x - dx) / scale + y = (y - dy) / scale + w /= scale + h /= scale + best_box = [x, y, w, h] + + kpt_output = np.array(outputs[3])[0] + confs = kpt_output[:, 2, :] + best_anchor_idx = np.argmax(np.mean(confs, axis=0)) + kpt_data = kpt_output[:, :, best_anchor_idx] + + keypoints = [] + for i in range(kpt_data.shape[0]): + x_img, y_img, vis_raw = kpt_data[i] + vis_prob = safe_sigmoid(vis_raw) + x_orig = (x_img - dx) / scale + y_orig = (y_img - dy) / scale + keypoints.append([x_orig, y_orig, vis_prob]) + + return np.array(keypoints), class_id, obj_conf, best_box + + +def compute_offset(keypoints, fixed_point, scale_x, scale_y): + if len(keypoints) < 2: return None + p1, p2 = keypoints[0], keypoints[1] + cx = (p1[0] + p2[0]) / 2 + cy = (p1[1] + p2[1]) / 2 + dx_mm = (cx - fixed_point[0]) * scale_x + dy_mm = (cy - fixed_point[1]) * scale_y + return cx, cy, dx_mm, dy_mm + + +def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path): + vis = image.copy() + fx, fy = map(int, fixed_point) + cx, cy, dx_mm, dy_mm = offset_info + + for i, (x, y, conf) in enumerate(keypoints[:2]): + if conf > 0.5: + color = (0, 0, 255) if i == 0 else (0, 255, 255) + cv2.circle(vis, (int(x), int(y)), 6, color, -1) + if len(keypoints) >= 2: + cv2.line(vis, (int(keypoints[0][0]), int(keypoints[0][1])), + (int(keypoints[1][0]), int(keypoints[1][1])), + (0, 255, 0), 2) + + x, y, w, h = bbox + cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2) + cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3) + cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3) + cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05) + cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + cv2.imwrite(save_path, vis) + + +def main(): + host_name = get_host() + print(f"[INFO] Running on: {host_name}") + + rknn = RKNNLite() + + # ---- Load Model ---- + ret = rknn.load_rknn(MODEL_PATH) + if ret != 0: + print("❌ Failed to load RKNN model!") + exit(ret) + print("✅ Model loaded successfully.") + + # ---- Init Runtime ---- + if host_name in ['RK3576', 'RK3588']: + ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + else: + ret = rknn.init_runtime() + if ret != 0: + print("❌ Init runtime failed!") + rknn.release() + exit(ret) + print("✅ Runtime initialized.") + + # ---- Preprocess ---- + ori_img = cv2.imread(IMAGE_PATH) + if ori_img is None: + print(f"❌ Cannot read image: {IMAGE_PATH}") + return + + img_resized, scale, dx, dy = letterbox_resize(ori_img, IMG_SIZE) + input_tensor = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8) # RGB + + # ---- Inference ---- + print("🔍 Starting inference...") + outputs = rknn.inference(inputs=[input_tensor]) + print("✅ Inference completed.") + + # ---- Post-process ---- + try: + keypoints, cls_id, obj_conf, bbox = parse_pose_outputs( + outputs, dx=dx, dy=dy, scale=scale) + + offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y) + if offset_info is None: + print("⚠️ Not enough keypoints detected.") + return + + cx, cy, dx_mm, dy_mm = offset_info + vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(IMAGE_PATH)}") + visualize_result(ori_img, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path) + + print(f"\n🎯 Detection Result:") + print(f"Center: ({cx:.1f}, {cy:.1f})") + print(f"Offset: ΔX={dx_mm:+.2f}mm, ΔY={dy_mm:+.2f}mm") + print(f"Class: {cls_id}, Confidence: {obj_conf:.3f}") + print(f"Saved result to: {vis_save_path}") + + except Exception as e: + print(f"❌ Post-processing error: {e}") + import traceback + traceback.print_exc() + + finally: + rknn.release() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/point_diff_main/calculate_diff.py b/point_diff_main/calculate_diff.py new file mode 100644 index 0000000..98ff4bd --- /dev/null +++ b/point_diff_main/calculate_diff.py @@ -0,0 +1,230 @@ +import cv2 +import numpy as np +import os +from rknnlite.api import RKNNLite + +# ====================== 配置区 ====================== +MODEL_PATH = "point.rknn" +OUTPUT_DIR = "./output_rknn" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# 固定参考点(像素坐标) +FIXED_REF_POINT = (535, 605) + +# mm/px 缩放因子(根据标定数据填写) +width_mm = 70.0 +width_px = 42 +SCALE_X = width_mm / float(width_px) +height_mm = 890.0 +height_px = 507 +SCALE_Y = height_mm / float(height_px) +print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px") + +# 输入尺寸 +IMG_SIZE = (640, 640) + + +def letterbox_resize(image, size, bg_color=114): + """保持比例缩放并填充到指定大小""" + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + + +def safe_sigmoid(x): + x = np.clip(x, -50, 50) + return 1.0 / (1.0 + np.exp(-x)) + + +def parse_pose_outputs(outputs, dx=0, dy=0, scale=1.0): + """ + 解析 RKNN YOLO-Pose 关键点输出 + outputs[3]: shape (1, 4, 3, 8400) -> [kpt_id, (x,y,conf), anchor] + """ + kpt_output = np.array(outputs[3])[0] # (4, 3, 8400) + confs = kpt_output[:, 2, :] # 取每个关键点的 visible_conf + mean_conf_per_anchor = np.mean(confs, axis=0) # 每个 anchor 的平均可见性 + best_anchor_idx = np.argmax(mean_conf_per_anchor) + kpt_data = kpt_output[:, :, best_anchor_idx] # (4, 3): x, y, vis_conf + + keypoints = [] + for i in range(4): + x_img = kpt_data[i, 0] + y_img = kpt_data[i, 1] + vis_conf_raw = kpt_data[i, 2] + vis_prob = safe_sigmoid(vis_conf_raw) + + # 映射回原图坐标 + x_orig = (x_img - dx) / scale + y_orig = (y_img - dy) / scale + keypoints.append([x_orig, y_orig, vis_prob]) + + return np.array(keypoints) + + +def compute_offset(keypoints, fixed_point, scale_x, scale_y): + """ + 计算中心点相对于固定参考点的偏移量(mm) + 中心点 = P0 和 P1 的中点 + 返回: (center_x, center_y, dx_mm, dy_mm) + """ + if len(keypoints) < 2: + return None + + p1, p2 = keypoints[0], keypoints[1] + cx = (p1[0] + p2[0]) / 2.0 + cy = (p1[1] + p2[1]) / 2.0 + + dx_px = cx - fixed_point[0] + dy_px = cy - fixed_point[1] + dx_mm = dx_px * scale_x + dy_mm = dy_px * scale_y + + return cx, cy, dx_mm, dy_mm + + +def visualize_result(image, keypoints, fixed_point, offset_info, save_path): + """ + 可视化关键点、参考点、中心点、偏移箭头和文字 + """ + vis = image.copy() + colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0)] + cx, cy, dx_mm, dy_mm = offset_info + fx, fy = map(int, fixed_point) + + # 绘制关键点 + for i, (x, y, conf) in enumerate(keypoints): + if conf > 0.5: + cv2.circle(vis, (int(x), int(y)), 8, colors[i], -1) + cv2.putText(vis, f"P{i}", (int(x) + 10, int(y) - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, colors[i], 2) + + # 绘制中心点 + cv2.circle(vis, (int(cx), int(cy)), 12, (0, 255, 0), 3) + cv2.putText(vis, "Center", (int(cx) + 20, int(cy)), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3) + + # 绘制参考点 + cv2.circle(vis, (fx, fy), 15, (255, 255, 0), 3) + cv2.putText(vis, "Ref", (fx + 20, fy), + cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 3) + + # 绘制偏移箭头和文字 + cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (0, 255, 255), 3, tipLength=0.05) + cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 40, fy - 40), + cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3) + cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 40, fy + 40), + cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3) + + cv2.imwrite(save_path, vis) + + +def calculate_offset_from_image(image_path, visualize=False): + """ + 主函数:输入图片路径,输出偏移量 (dx_mm, dy_mm) + + 参数: + image_path (str): 输入图像路径 + visualize (bool): 是否保存可视化结果 + + 返回: + dict: { + 'success': bool, + 'dx_mm': float or None, + 'dy_mm': float or None, + 'cx': float or None, # 中心点 x + 'cy': float or None, # 中心点 y + 'message': str + } + """ + # 读取图像 + orig = cv2.imread(image_path) + if orig is None: + return { + 'success': False, + 'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None, + 'message': f'Failed to load image: {image_path}' + } + + h0, w0 = orig.shape[:2] + + # 预处理 + img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE) + infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8) + + # 加载模型并推理 + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(MODEL_PATH) + if ret != 0: + return { + 'success': False, + 'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None, + 'message': 'Failed to load RKNN model' + } + + try: + rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + outputs = rknn.inference([infer_img]) + except Exception as e: + rknn.release() + return { + 'success': False, + 'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None, + 'message': f'Inference error: {str(e)}' + } + finally: + rknn.release() + + # 解析关键点 + try: + keypoints = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale) + except Exception as e: + return { + 'success': False, + 'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None, + 'message': f'Parse keypoint error: {str(e)}' + } + + # 计算偏移 + offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y) + if offset_info is None: + return { + 'success': False, + 'dx_mm': None, 'dy_mm': None, 'cx': None, 'cy': None, + 'message': 'Not enough keypoints to compute offset' + } + + cx, cy, dx_mm, dy_mm = offset_info + + # 可视化(可选) + if visualize: + vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}") + visualize_result(orig, keypoints, FIXED_REF_POINT, offset_info, vis_save_path) + + return { + 'success': True, + 'dx_mm': dx_mm, + 'dy_mm': dy_mm, + 'cx': cx, + 'cy': cy, + 'message': 'Success' + } + + +# ====================== 使用示例 ====================== +if __name__ == "__main__": + image_path = "11.jpg" + + result = calculate_offset_from_image(image_path, visualize=True) + + if result['success']: + print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})") + print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm") + else: + print("Error:", result['message']) \ No newline at end of file diff --git a/point_diff_main/calculate_diff2.0.py b/point_diff_main/calculate_diff2.0.py new file mode 100644 index 0000000..cb45c7a --- /dev/null +++ b/point_diff_main/calculate_diff2.0.py @@ -0,0 +1,236 @@ +import cv2 +import numpy as np +import os +from rknnlite.api import RKNNLite + +# ====================== 配置区 ====================== + +MODEL_PATH = "point.rknn" +OUTPUT_DIR = "./output_rknn" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# 固定参考点(像素坐标) +FIXED_REF_POINT = (535, 605) + +# mm/px 缩放因子(根据标定数据填写) +width_mm = 70.0 +width_px = 42 +SCALE_X = width_mm / float(width_px) +height_mm = 890.0 +height_px = 507 +SCALE_Y = height_mm / float(height_px) +print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px") + +# 输入尺寸 +IMG_SIZE = (640, 640) + + +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + + +def safe_sigmoid(x): + x = np.clip(x, -50, 50) + return 1.0 / (1.0 + np.exp(-x)) + + +def softmax(x): + x = x - np.max(x) + e = np.exp(x) + return e / e.sum() + + +def dfl_to_xywh(loc, grid_x, grid_y, stride): + """将 DFL 输出解析为 xywh""" + xywh_ = np.zeros(4) + xywh = np.zeros(4) + + # 每个维度 16 bins 做 softmax + for i in range(4): + l = loc[i * 16:(i + 1) * 16] + l = softmax(l) + xywh_[i] = sum([j * l[j] for j in range(16)]) + + # 对应公式 + xywh_[0] = (grid_x + 0.5) - xywh_[0] + xywh_[1] = (grid_y + 0.5) - xywh_[1] + xywh_[2] = (grid_x + 0.5) + xywh_[2] + xywh_[3] = (grid_y + 0.5) + xywh_[3] + + # 转成中心点 + 宽高 + xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride + xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride + xywh[2] = (xywh_[2] - xywh_[0]) * stride + xywh[3] = (xywh_[3] - xywh_[1]) * stride + + # 转为左上角坐标 + xywh[0] = xywh[0] - xywh[2] / 2 + xywh[1] = xywh[1] - xywh[3] / 2 + return xywh + + +def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0): + """ + 完整解析 RKNN YOLO-Pose 输出 + 返回 keypoints, class_id, obj_conf, bbox(已映射回原图) + """ + boxes = [] + obj_confs = [] + class_ids = [] + + # 遍历前三个输出 tensor (det 输出) + for idx in range(3): + det = np.array(outputs[idx])[0] # (C,H,W) + C, H, W = det.shape + num_classes = C - 64 # 前64通道为 DFL bbox + stride = 640 // H + + for h in range(H): + for w in range(W): + for c in range(num_classes): + conf = safe_sigmoid(det[64 + c, h, w]) + if conf >= conf_threshold: + loc = det[:64, h, w].astype(np.float32) + xywh = dfl_to_xywh(loc, w, h, stride) + boxes.append(xywh) + obj_confs.append(conf) + class_ids.append(c) + + if not obj_confs: + best_box = np.array([0, 0, 0, 0]) + class_id = -1 + obj_conf = 0.0 + else: + max_idx = np.argmax(obj_confs) + best_box = boxes[max_idx] + class_id = class_ids[max_idx] + obj_conf = obj_confs[max_idx] + + # 🔹 bbox 坐标映射回原图 + x, y, w, h = best_box + x = (x - dx) / scale + y = (y - dy) / scale + w = w / scale + h = h / scale + best_box = np.array([x, y, w, h]) + + # 🔹 关键点解析 + kpt_output = np.array(outputs[3])[0] # (num_kpts, 3, num_anchor) + confs = kpt_output[:, 2, :] + best_anchor_idx = np.argmax(np.mean(confs, axis=0)) + kpt_data = kpt_output[:, :, best_anchor_idx] + + keypoints = [] + for i in range(kpt_data.shape[0]): + x_img, y_img, vis_conf_raw = kpt_data[i] + vis_prob = safe_sigmoid(vis_conf_raw) + x_orig = (x_img - dx) / scale + y_orig = (y_img - dy) / scale + keypoints.append([x_orig, y_orig, vis_prob]) + + return np.array(keypoints), class_id, obj_conf, best_box + + +def compute_offset(keypoints, fixed_point, scale_x, scale_y): + if len(keypoints) < 2: + return None + p1, p2 = keypoints[0], keypoints[1] + cx = (p1[0] + p2[0]) / 2.0 + cy = (p1[1] + p2[1]) / 2.0 + dx_mm = (cx - fixed_point[0]) * scale_x + dy_mm = (cy - fixed_point[1]) * scale_y + return cx, cy, dx_mm, dy_mm + + +def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path): + vis = image.copy() + colors = [(0, 0, 255), (0, 255, 255)] + cx, cy, dx_mm, dy_mm = offset_info + fx, fy = map(int, fixed_point) + + # 绘制关键点 + for i, (x, y, conf) in enumerate(keypoints[:2]): + if conf > 0.5: + cv2.circle(vis, (int(x), int(y)), 6, colors[i], -1) + if len(keypoints) >= 2: + cv2.line(vis, + (int(keypoints[0][0]), int(keypoints[0][1])), + (int(keypoints[1][0]), int(keypoints[1][1])), + (0, 255, 0), 2) + + # 绘制 bbox + x, y, w, h = bbox + cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2) + + # 绘制中心点 + cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3) + cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3) + cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05) + cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + + cv2.imwrite(save_path, vis) + + +def calculate_offset_from_image(image_path, visualize=False): + orig = cv2.imread(image_path) + if orig is None: + return {'success': False, 'message': f'Failed to load image: {image_path}'} + + img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE) + infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8) + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(MODEL_PATH) + if ret != 0: + return {'success': False, 'message': 'Failed to load RKNN model'} + + try: + rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + outputs = rknn.inference([infer_img]) + finally: + rknn.release() + + try: + keypoints, class_id, obj_conf, bbox = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale) + except Exception as e: + return {'success': False, 'message': f'Parse error: {str(e)}'} + + offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y) + if offset_info is None: + return {'success': False, 'message': 'Not enough keypoints'} + + cx, cy, dx_mm, dy_mm = offset_info + + if visualize: + vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}") + visualize_result(orig, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path) + + return {'success': True, 'dx_mm': dx_mm, 'dy_mm': dy_mm, + 'cx': cx, 'cy': cy, 'class_id': class_id, + 'obj_conf': obj_conf, 'bbox': bbox, + 'message': 'Success'} + + +# ====================== 使用示例 ====================== +if __name__ == "__main__": + image_path = "11.jpg" + result = calculate_offset_from_image(image_path, visualize=True) + + if result['success']: + print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})") + print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm") + print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}") + print(f"BBox: {result['bbox']}") + else: + print("Error:", result['message']) diff --git a/point_diff_main/point.rknn b/point_diff_main/point.rknn new file mode 100644 index 0000000..64d89cc Binary files /dev/null and b/point_diff_main/point.rknn differ