xiantiao_CV/rknn-multi-threaded-nosigmoid/func.py

# 下面代码基于你给出的 yolov5 示例做最小修改的适配版
import cv2
import numpy as np

OBJ_THRESH, NMS_THRESH, IMG_SIZE = 0.25, 0.45, 640

CLASSES = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light",
           "fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant",
           "bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
           "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ",
           "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa",
           "pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop\t", "mouse\t", "remote ", "keyboard ", "cell phone", "microwave ",
           "oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ")

# ---------- 保留你原来的辅助函数（process/filter/nms/xywh2xyxy） ----------
def xywh2xyxy(x):
    y = np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2
    y[:, 1] = x[:, 1] - x[:, 3] / 2
    y[:, 2] = x[:, 0] + x[:, 2] / 2
    y[:, 3] = x[:, 1] + x[:, 3] / 2
    return y


def process(input, mask, anchors):
    # input: (grid_h, grid_w, 3, attrs)  attrs >=5+num_classes
    # anchors: list of (w,h) pairs for the 3 anchors
    anchors = [anchors[i] for i in mask]
    grid_h, grid_w = map(int, input.shape[0:2])

    box_confidence = input[..., 4]
    box_confidence = np.expand_dims(box_confidence, axis=-1)

    box_class_probs = input[..., 5:]

    # YOLO11 style decode used originally in your code:
    box_xy = input[..., :2] * 2 - 0.5

    # build grid
    col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
    row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
    col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
    grid = np.concatenate((col, row), axis=-1)
    box_xy += grid
    box_xy *= int(IMG_SIZE / grid_h)

    box_wh = pow(input[..., 2:4] * 2, 2)
    # multiply by provided anchors (we will use unit anchors if we want to neutralize)
    box_wh = box_wh * anchors

    return np.concatenate((box_xy, box_wh), axis=-1), box_confidence, box_class_probs


def filter_boxes(boxes, box_confidences, box_class_probs):
    boxes = boxes.reshape(-1, 4)
    box_confidences = box_confidences.reshape(-1)
    box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])

    _box_pos = np.where(box_confidences >= OBJ_THRESH)
    boxes = boxes[_box_pos]
    box_confidences = box_confidences[_box_pos]
    box_class_probs = box_class_probs[_box_pos]

    class_max_score = np.max(box_class_probs, axis=-1)
    classes = np.argmax(box_class_probs, axis=-1)
    _class_pos = np.where(class_max_score >= OBJ_THRESH)

    return boxes[_class_pos], classes[_class_pos], (class_max_score * box_confidences)[_class_pos]


def nms_boxes(boxes, scores):
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    eps = 1e-7
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 1e-5)
        h1 = np.maximum(0.0, yy2 - yy1 + 1e-5)
        inter = w1 * h1

        denom = (areas[i] + areas[order[1:]] - inter)
        denom = np.maximum(denom, eps)
        ovr = inter / denom
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    return np.array(keep)


# ---------- 关键：把你的 9 输出拼成原来 yolov5_post_process 需要的 input_data 格式 ----------
def yolov11_to_yolov5_style_input(outputs):
    """
    outputs: list of 9 tensors (1, C, H, W) in this order per your print:
      [reg80, cls80, obj80, reg40, cls40, obj40, reg20, cls20, obj20]
    We will convert each scale to a (H, W, 3, 5+num_classes) array and repeat
    the same per-anchor slice so that your existing yolov5_post_process can be reused.
    To avoid anchor scaling changing box_wh, we set anchors to 1x1 in later call.
    """
    input_data = []
    # scales: (indices and corresponding H,W from tensors)
    for i in range(0, 9, 3):
        reg = outputs[i][0]   # (64, H, W)
        cls = outputs[i+1][0] # (80, H, W)
        obj = outputs[i+2][0] # (1, H, W)

        # find H,W from reg
        H = reg.shape[1]
        W = reg.shape[2]

        # xywh: assume first 4 channels of reg are x,y,w,h per cell
        xywh = reg[0:4, :, :]               # (4, H, W)
        xywh = np.transpose(xywh, (1, 2, 0))  # (H, W, 4)

        # obj and cls to H,W,?
        obj_hw = np.transpose(obj[0, :, :], (0, 1))      # (H, W)
        cls_hw = np.transpose(cls, (1, 2, 0))            # (H, W, 80)

        # build one anchor slice: [x,y,w,h,obj, cls80] -> shape (H, W, 5+80)
        slice_hw = np.concatenate([xywh, obj_hw[..., None], cls_hw], axis=-1)  # (H, W, 85)

        # repeat to make 3 anchors per cell (so shape becomes H,W,3,85)
        slice_3 = np.repeat(slice_hw[:, :, None, :], 3, axis=2)  # (H, W, 3, 85)

        input_data.append(slice_3)

    return input_data


def yolov5_post_process_adapted(input_data):
    """
    复用你原来的 yolov5_post_process，但使用 unit anchors so that
    box_wh 不会被不正确放缩。
    """
    masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]  # unused in anchors here, but kept for compatibility
    # use neutral anchors (1,1) to avoid scaling change
    anchors = [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1],
               [1, 1], [1, 1], [1, 1], [1, 1]]

    boxes, classes, scores = [], [], []
    # input_data already is list of 3 arrays shaped (H,W,3,85)
    for input in input_data:
        # process() expects shape (grid_h, grid_w, 3, attrs)
        b, c, s = process(input, [0,1,2], anchors)  # mask and anchors values used inside process
        b, c, s = filter_boxes(b, c, s)
        boxes.append(b)
        classes.append(c)
        scores.append(s)

    if len(boxes) == 0:
        return None, None, None

    boxes = np.concatenate(boxes)
    boxes = xywh2xyxy(boxes)
    classes = np.concatenate(classes)
    scores = np.concatenate(scores)

    # nms per class
    nboxes, nclasses, nscores = [], [], []
    for cls_id in set(classes):
        inds = np.where(classes == cls_id)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]

        keep = nms_boxes(b, s)

        nboxes.append(b[keep])
        nclasses.append(c[keep])
        nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    return np.concatenate(nboxes), np.concatenate(nclasses), np.concatenate(nscores)


# ---------- draw 保持原样 ----------
def draw(image, boxes, scores, classes):
    for box, score, cl in zip(boxes, scores, classes):
        top, left, right, bottom = box
        top = int(top)
        left = int(left)
        cv2.rectangle(image, (top, left), (int(right), int(bottom)), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 2)


# ---------- 最终 myFunc（替换你原来的 myFunc） ----------
def myFunc(rknn_lite, IMG):
    # 1. BGR -> RGB
    IMG = cv2.cvtColor(IMG, cv2.COLOR_BGR2RGB)

    # 2. Resize to model input size (640x640)
    IMG = cv2.resize(IMG, (IMG_SIZE, IMG_SIZE))  # (640, 640, 3)

    # 3. HWC -> CHW
    IMG = np.transpose(IMG, (2, 0, 1))  # (3, 640, 640)

    # 4. Add batch dimension: (1, 3, 640, 640)
    IMG_in = np.expand_dims(IMG, axis=0).astype(np.uint8)

    # 5. Inference
    outputs = rknn_lite.inference(inputs=[IMG_in])

    if outputs is None:
        print("⚠️ Inference failed, skipping frame.")
        return cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)

    # 6. Convert 9-output -> yolov5 style input_data
    input_data = yolov11_to_yolov5_style_input(outputs)

    # 7. Run adapted yolov5_post_process
    boxes, classes, scores = yolov5_post_process_adapted(input_data)

    # 8. 如果你只想保留 car（COCO id=2），在这里再过滤一次
    if boxes is not None:
        keep_car = np.where(classes == 2)[0]
        if keep_car.size == 0:
            boxes, classes, scores = None, None, None
        else:
            boxes = boxes[keep_car]
            classes = classes[keep_car]
            scores = scores[keep_car]

    # Convert back to BGR for OpenCV display
    IMG_vis = cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
    if boxes is not None:
        draw(IMG_vis, boxes, scores, classes)
    return IMG_vis