bgg35-50

2026-01-16 15:21:54 +08:00
parent 69361c5d5b
commit 1fd14cf7d8
41 changed files with 1133 additions and 26131 deletions
--- a/Mv3D/calculate_diff2.py
+++ b/Mv3D/calculate_diff2.py
@ -1,6 +1,7 @@
 import cv2
 import numpy as np
 import os
+#from rknnlite.api import RKNNLite

 # ====================== 配置区 ======================

@ -23,6 +24,34 @@ print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px"
 # 输入尺寸
 IMG_SIZE = (640, 640)

+# ====================== RKNN 单例管理 ======================
+_rknn_instance = None
+
+def init_rknn_once(model_path):
+    """只加载一次 RKNN 模型"""
+    global _rknn_instance
+    if _rknn_instance is None:
+        _rknn_instance = RKNNLite(verbose=False)
+        ret = _rknn_instance.load_rknn(model_path)
+        if ret != 0:
+            print(f"[ERROR] Failed to load RKNN: {ret}")
+            _rknn_instance = None
+            return None
+        ret = _rknn_instance.init_runtime()
+        if ret != 0:
+            print(f"[ERROR] Failed to init RKNN runtime: {ret}")
+            _rknn_instance = None
+            return None
+    return _rknn_instance
+
+def release_rknn():
+    """释放 RKNN 单例"""
+    global _rknn_instance
+    if _rknn_instance:
+        _rknn_instance.release()
+        _rknn_instance = None
+
+# ====================== 工具函数 ======================

 def letterbox_resize(image, size, bg_color=114):
    target_w, target_h = size
@ -35,63 +64,43 @@ def letterbox_resize(image, size, bg_color=114):
    canvas[dy:dy + new_h, dx:dx + new_w] = resized
    return canvas, scale, dx, dy

-
 def safe_sigmoid(x):
    x = np.clip(x, -50, 50)
    return 1.0 / (1.0 + np.exp(-x))

-
 def softmax(x):
    x = x - np.max(x)
    e = np.exp(x)
    return e / e.sum()

-
 def dfl_to_xywh(loc, grid_x, grid_y, stride):
-    """将 DFL 输出解析为 xywh"""
    xywh_ = np.zeros(4)
    xywh = np.zeros(4)
-
-    # 每个维度 16 bins 做 softmax
    for i in range(4):
        l = loc[i * 16:(i + 1) * 16]
        l = softmax(l)
        xywh_[i] = sum([j * l[j] for j in range(16)])
-
-    # 对应公式
    xywh_[0] = (grid_x + 0.5) - xywh_[0]
    xywh_[1] = (grid_y + 0.5) - xywh_[1]
    xywh_[2] = (grid_x + 0.5) + xywh_[2]
    xywh_[3] = (grid_y + 0.5) + xywh_[3]
-
-    # 转成中心点 + 宽高
    xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride
    xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride
    xywh[2] = (xywh_[2] - xywh_[0]) * stride
    xywh[3] = (xywh_[3] - xywh_[1]) * stride
-
-    # 转为左上角坐标
    xywh[0] = xywh[0] - xywh[2] / 2
    xywh[1] = xywh[1] - xywh[3] / 2
    return xywh

-
 def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
-    """
-    完整解析 RKNN YOLO-Pose 输出
-    返回 keypoints, class_id, obj_conf, bbox（已映射回原图）
-    """
    boxes = []
    obj_confs = []
    class_ids = []
-
-    # 遍历前三个输出 tensor (det 输出)
    for idx in range(3):
-        det = np.array(outputs[idx])[0]  # (C,H,W)
+        det = np.array(outputs[idx])[0]
        C, H, W = det.shape
-        num_classes = C - 64  # 前64通道为 DFL bbox
+        num_classes = C - 64
        stride = 640 // H
-
        for h in range(H):
            for w in range(W):
                for c in range(num_classes):
@ -102,7 +111,6 @@ def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
                        boxes.append(xywh)
                        obj_confs.append(conf)
                        class_ids.append(c)
-
    if not obj_confs:
        best_box = np.array([0, 0, 0, 0])
        class_id = -1
@ -112,21 +120,16 @@ def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
        best_box = boxes[max_idx]
        class_id = class_ids[max_idx]
        obj_conf = obj_confs[max_idx]
-
-    # 🔹 bbox 坐标映射回原图
    x, y, w, h = best_box
    x = (x - dx) / scale
    y = (y - dy) / scale
    w = w / scale
    h = h / scale
    best_box = np.array([x, y, w, h])
-
-    # 🔹 关键点解析
-    kpt_output = np.array(outputs[3])[0]  # (num_kpts, 3, num_anchor)
+    kpt_output = np.array(outputs[3])[0]
    confs = kpt_output[:, 2, :]
    best_anchor_idx = np.argmax(np.mean(confs, axis=0))
    kpt_data = kpt_output[:, :, best_anchor_idx]
-
    keypoints = []
    for i in range(kpt_data.shape[0]):
        x_img, y_img, vis_conf_raw = kpt_data[i]
@ -134,10 +137,8 @@ def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
        x_orig = (x_img - dx) / scale
        y_orig = (y_img - dy) / scale
        keypoints.append([x_orig, y_orig, vis_prob])
-
    return np.array(keypoints), class_id, obj_conf, best_box

-
 def compute_offset(keypoints, fixed_point, scale_x, scale_y):
    if len(keypoints) < 2:
        return None
@ -148,28 +149,19 @@ def compute_offset(keypoints, fixed_point, scale_x, scale_y):
    dy_mm = (cy - fixed_point[1]) * scale_y
    return cx, cy, dx_mm, dy_mm

-
 def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path):
    vis = image.copy()
    colors = [(0, 0, 255), (0, 255, 255)]
    cx, cy, dx_mm, dy_mm = offset_info
    fx, fy = map(int, fixed_point)
-
-    # 绘制关键点
    for i, (x, y, conf) in enumerate(keypoints[:2]):
        if conf > 0.5:
            cv2.circle(vis, (int(x), int(y)), 6, colors[i], -1)
    if len(keypoints) >= 2:
-        cv2.line(vis,
-                 (int(keypoints[0][0]), int(keypoints[0][1])),
-                 (int(keypoints[1][0]), int(keypoints[1][1])),
-                 (0, 255, 0), 2)
-
-    # 绘制 bbox
+        cv2.line(vis, (int(keypoints[0][0]), int(keypoints[0][1])),
+                 (int(keypoints[1][0]), int(keypoints[1][1])), (0, 255, 0), 2)
    x, y, w, h = bbox
    cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2)
-
-    # 绘制中心点
    cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3)
    cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3)
    cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05)
@ -177,30 +169,26 @@ def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
    cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
-
    cv2.imwrite(save_path, vis)

+# ====================== 主函数 ======================

 def calculate_offset_from_image(image_path, visualize=False):
-    from rknnlite.api import RKNNLite
-    
-    orig = cv2.imread(image_path)
+    orig=cv2.imread(image_path)
    if orig is None:
        return {'success': False, 'message': f'Failed to load image: {image_path}'}

    img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE)
    infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)

-    rknn = RKNNLite(verbose=False)
-    ret = rknn.load_rknn(MODEL_PATH)
-    if ret != 0:
-        return {'success': False, 'message': 'Failed to load RKNN model'}
+    rknn = init_rknn_once(MODEL_PATH)
+    if rknn is None:
+        return {'success': False, 'message': 'Failed to init RKNN'}

    try:
-        rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
        outputs = rknn.inference([infer_img])
-    finally:
-        rknn.release()
+    except Exception as e:
+        return {'success': False, 'message': f'RKNN inference error: {str(e)}'}

    try:
        keypoints, class_id, obj_conf, bbox = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale)
@ -222,16 +210,40 @@ def calculate_offset_from_image(image_path, visualize=False):
            'obj_conf': obj_conf, 'bbox': bbox,
            'message': 'Success'}

-
-# ====================== 使用示例 ======================
+# ====================== 示例调用 ======================
 if __name__ == "__main__":
-    image_path = "11.jpg"
-    result = calculate_offset_from_image(image_path, visualize=True)

-    if result['success']:
-        print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
-        print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
-        print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}")
-        print(f"BBox: {result['bbox']}")
-    else:
-        print("Error:", result['message'])
+    time_start = time.time()
+    camera = CameraUtil()
+
+    for i in range(5):
+        time_start = time.time()
+        image_path=camera.save_img()
+        time_end = time.time()
+        print(f"Time cost22: {time_end - time_start:.3f} s")
+
+        # image_path = "11.jpg"
+        result = calculate_offset_from_image(image_path, visualize=False)
+    
+        
+        if result['success']:
+            print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
+            print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
+            print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}")
+            print(f"BBox: {result['bbox']}")
+        else:
+            print("Error:", result['message'])
+        time_end = time.time()
+        print(f"Time cost: {time_end - time_start:.3f} s")
+
+    # image_path = "11.jpg"
+    # orig = cv2.imread(image_path)
+    # result = calculate_offset_from_image(orig, visualize=False)
+    # if result['success']:
+    #     print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
+    #     print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
+    #     print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}")
+    #     print(f"BBox: {result['bbox']}")
+    # else:
+    #     print("Error:", result['message'])
+