first commit

2025-12-16 15:12:02 +08:00
commit 8506c3af79
227 changed files with 4060 additions and 0 deletions
--- a/rknn-multi-threaded-nosigmoid/func.py
+++ b/rknn-multi-threaded-nosigmoid/func.py
@ -0,0 +1,246 @@
+# 下面代码基于你给出的 yolov5 示例做最小修改的适配版
+import cv2
+import numpy as np
+
+OBJ_THRESH, NMS_THRESH, IMG_SIZE = 0.25, 0.45, 640
+
+CLASSES = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light",
+           "fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant",
+           "bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
+           "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ",
+           "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa",
+           "pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop\t", "mouse\t", "remote ", "keyboard ", "cell phone", "microwave ",
+           "oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ")
+
+# ---------- 保留你原来的辅助函数（process/filter/nms/xywh2xyxy） ----------
+def xywh2xyxy(x):
+    y = np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2
+    y[:, 1] = x[:, 1] - x[:, 3] / 2
+    y[:, 2] = x[:, 0] + x[:, 2] / 2
+    y[:, 3] = x[:, 1] + x[:, 3] / 2
+    return y
+
+
+def process(input, mask, anchors):
+    # input: (grid_h, grid_w, 3, attrs)  attrs >=5+num_classes
+    # anchors: list of (w,h) pairs for the 3 anchors
+    anchors = [anchors[i] for i in mask]
+    grid_h, grid_w = map(int, input.shape[0:2])
+
+    box_confidence = input[..., 4]
+    box_confidence = np.expand_dims(box_confidence, axis=-1)
+
+    box_class_probs = input[..., 5:]
+
+    # YOLO11 style decode used originally in your code:
+    box_xy = input[..., :2] * 2 - 0.5
+
+    # build grid
+    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
+    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
+    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
+    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
+    grid = np.concatenate((col, row), axis=-1)
+    box_xy += grid
+    box_xy *= int(IMG_SIZE / grid_h)
+
+    box_wh = pow(input[..., 2:4] * 2, 2)
+    # multiply by provided anchors (we will use unit anchors if we want to neutralize)
+    box_wh = box_wh * anchors
+
+    return np.concatenate((box_xy, box_wh), axis=-1), box_confidence, box_class_probs
+
+
+def filter_boxes(boxes, box_confidences, box_class_probs):
+    boxes = boxes.reshape(-1, 4)
+    box_confidences = box_confidences.reshape(-1)
+    box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])
+
+    _box_pos = np.where(box_confidences >= OBJ_THRESH)
+    boxes = boxes[_box_pos]
+    box_confidences = box_confidences[_box_pos]
+    box_class_probs = box_class_probs[_box_pos]
+
+    class_max_score = np.max(box_class_probs, axis=-1)
+    classes = np.argmax(box_class_probs, axis=-1)
+    _class_pos = np.where(class_max_score >= OBJ_THRESH)
+
+    return boxes[_class_pos], classes[_class_pos], (class_max_score * box_confidences)[_class_pos]
+
+
+def nms_boxes(boxes, scores):
+    x = boxes[:, 0]
+    y = boxes[:, 1]
+    w = boxes[:, 2] - boxes[:, 0]
+    h = boxes[:, 3] - boxes[:, 1]
+
+    areas = w * h
+    order = scores.argsort()[::-1]
+
+    keep = []
+    eps = 1e-7
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        xx1 = np.maximum(x[i], x[order[1:]])
+        yy1 = np.maximum(y[i], y[order[1:]])
+        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
+        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
+
+        w1 = np.maximum(0.0, xx2 - xx1 + 1e-5)
+        h1 = np.maximum(0.0, yy2 - yy1 + 1e-5)
+        inter = w1 * h1
+
+        denom = (areas[i] + areas[order[1:]] - inter)
+        denom = np.maximum(denom, eps)
+        ovr = inter / denom
+        inds = np.where(ovr <= NMS_THRESH)[0]
+        order = order[inds + 1]
+    return np.array(keep)
+
+
+# ---------- 关键：把你的 9 输出拼成原来 yolov5_post_process 需要的 input_data 格式 ----------
+def yolov11_to_yolov5_style_input(outputs):
+    """
+    outputs: list of 9 tensors (1, C, H, W) in this order per your print:
+      [reg80, cls80, obj80, reg40, cls40, obj40, reg20, cls20, obj20]
+    We will convert each scale to a (H, W, 3, 5+num_classes) array and repeat
+    the same per-anchor slice so that your existing yolov5_post_process can be reused.
+    To avoid anchor scaling changing box_wh, we set anchors to 1x1 in later call.
+    """
+    input_data = []
+    # scales: (indices and corresponding H,W from tensors)
+    for i in range(0, 9, 3):
+        reg = outputs[i][0]   # (64, H, W)
+        cls = outputs[i+1][0] # (80, H, W)
+        obj = outputs[i+2][0] # (1, H, W)
+
+        # find H,W from reg
+        H = reg.shape[1]
+        W = reg.shape[2]
+
+        # xywh: assume first 4 channels of reg are x,y,w,h per cell
+        xywh = reg[0:4, :, :]               # (4, H, W)
+        xywh = np.transpose(xywh, (1, 2, 0))  # (H, W, 4)
+
+        # obj and cls to H,W,?
+        obj_hw = np.transpose(obj[0, :, :], (0, 1))      # (H, W)
+        cls_hw = np.transpose(cls, (1, 2, 0))            # (H, W, 80)
+
+        # build one anchor slice: [x,y,w,h,obj, cls80] -> shape (H, W, 5+80)
+        slice_hw = np.concatenate([xywh, obj_hw[..., None], cls_hw], axis=-1)  # (H, W, 85)
+
+        # repeat to make 3 anchors per cell (so shape becomes H,W,3,85)
+        slice_3 = np.repeat(slice_hw[:, :, None, :], 3, axis=2)  # (H, W, 3, 85)
+
+        input_data.append(slice_3)
+
+    return input_data
+
+
+def yolov5_post_process_adapted(input_data):
+    """
+    复用你原来的 yolov5_post_process，但使用 unit anchors so that
+    box_wh 不会被不正确放缩。
+    """
+    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]  # unused in anchors here, but kept for compatibility
+    # use neutral anchors (1,1) to avoid scaling change
+    anchors = [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1],
+               [1, 1], [1, 1], [1, 1], [1, 1]]
+
+    boxes, classes, scores = [], [], []
+    # input_data already is list of 3 arrays shaped (H,W,3,85)
+    for input in input_data:
+        # process() expects shape (grid_h, grid_w, 3, attrs)
+        b, c, s = process(input, [0,1,2], anchors)  # mask and anchors values used inside process
+        b, c, s = filter_boxes(b, c, s)
+        boxes.append(b)
+        classes.append(c)
+        scores.append(s)
+
+    if len(boxes) == 0:
+        return None, None, None
+
+    boxes = np.concatenate(boxes)
+    boxes = xywh2xyxy(boxes)
+    classes = np.concatenate(classes)
+    scores = np.concatenate(scores)
+
+    # nms per class
+    nboxes, nclasses, nscores = [], [], []
+    for cls_id in set(classes):
+        inds = np.where(classes == cls_id)
+        b = boxes[inds]
+        c = classes[inds]
+        s = scores[inds]
+
+        keep = nms_boxes(b, s)
+
+        nboxes.append(b[keep])
+        nclasses.append(c[keep])
+        nscores.append(s[keep])
+
+    if not nclasses and not nscores:
+        return None, None, None
+
+    return np.concatenate(nboxes), np.concatenate(nclasses), np.concatenate(nscores)
+
+
+# ---------- draw 保持原样 ----------
+def draw(image, boxes, scores, classes):
+    for box, score, cl in zip(boxes, scores, classes):
+        top, left, right, bottom = box
+        top = int(top)
+        left = int(left)
+        cv2.rectangle(image, (top, left), (int(right), int(bottom)), (255, 0, 0), 2)
+        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
+                    (top, left - 6),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    0.6, (0, 0, 255), 2)
+
+
+# ---------- 最终 myFunc（替换你原来的 myFunc） ----------
+def myFunc(rknn_lite, IMG):
+    # 1. BGR -> RGB
+    IMG = cv2.cvtColor(IMG, cv2.COLOR_BGR2RGB)
+
+    # 2. Resize to model input size (640x640)
+    IMG = cv2.resize(IMG, (IMG_SIZE, IMG_SIZE))  # (640, 640, 3)
+
+    # 3. HWC -> CHW
+    IMG = np.transpose(IMG, (2, 0, 1))  # (3, 640, 640)
+
+    # 4. Add batch dimension: (1, 3, 640, 640)
+    IMG_in = np.expand_dims(IMG, axis=0).astype(np.uint8)
+
+    # 5. Inference
+    outputs = rknn_lite.inference(inputs=[IMG_in])
+
+    if outputs is None:
+        print("⚠️ Inference failed, skipping frame.")
+        return cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
+
+    # 6. Convert 9-output -> yolov5 style input_data
+    input_data = yolov11_to_yolov5_style_input(outputs)
+
+    # 7. Run adapted yolov5_post_process
+    boxes, classes, scores = yolov5_post_process_adapted(input_data)
+
+    # 8. 如果你只想保留 car（COCO id=2），在这里再过滤一次
+    if boxes is not None:
+        keep_car = np.where(classes == 2)[0]
+        if keep_car.size == 0:
+            boxes, classes, scores = None, None, None
+        else:
+            boxes = boxes[keep_car]
+            classes = classes[keep_car]
+            scores = scores[keep_car]
+
+    # Convert back to BGR for OpenCV display
+    IMG_vis = cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
+    if boxes is not None:
+        draw(IMG_vis, boxes, scores, classes)
+    return IMG_vis
+