# 下面代码基于你给出的 yolov5 示例做最小修改的适配版 import cv2 import numpy as np OBJ_THRESH, NMS_THRESH, IMG_SIZE = 0.25, 0.45, 640 CLASSES = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light", "fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant", "bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop\t", "mouse\t", "remote ", "keyboard ", "cell phone", "microwave ", "oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ") # ---------- 保留你原来的辅助函数(process/filter/nms/xywh2xyxy) ---------- def xywh2xyxy(x): y = np.copy(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 y[:, 1] = x[:, 1] - x[:, 3] / 2 y[:, 2] = x[:, 0] + x[:, 2] / 2 y[:, 3] = x[:, 1] + x[:, 3] / 2 return y def process(input, mask, anchors): # input: (grid_h, grid_w, 3, attrs) attrs >=5+num_classes # anchors: list of (w,h) pairs for the 3 anchors anchors = [anchors[i] for i in mask] grid_h, grid_w = map(int, input.shape[0:2]) box_confidence = input[..., 4] box_confidence = np.expand_dims(box_confidence, axis=-1) box_class_probs = input[..., 5:] # YOLO11 style decode used originally in your code: box_xy = input[..., :2] * 2 - 0.5 # build grid col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w) row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h) col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) grid = np.concatenate((col, row), axis=-1) box_xy += grid box_xy *= int(IMG_SIZE / grid_h) box_wh = pow(input[..., 2:4] * 2, 2) # multiply by provided anchors (we will use unit anchors if we want to neutralize) box_wh = box_wh * anchors return np.concatenate((box_xy, box_wh), axis=-1), box_confidence, box_class_probs def filter_boxes(boxes, box_confidences, box_class_probs): boxes = boxes.reshape(-1, 4) box_confidences = box_confidences.reshape(-1) box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1]) _box_pos = np.where(box_confidences >= OBJ_THRESH) boxes = boxes[_box_pos] box_confidences = box_confidences[_box_pos] box_class_probs = box_class_probs[_box_pos] class_max_score = np.max(box_class_probs, axis=-1) classes = np.argmax(box_class_probs, axis=-1) _class_pos = np.where(class_max_score >= OBJ_THRESH) return boxes[_class_pos], classes[_class_pos], (class_max_score * box_confidences)[_class_pos] def nms_boxes(boxes, scores): x = boxes[:, 0] y = boxes[:, 1] w = boxes[:, 2] - boxes[:, 0] h = boxes[:, 3] - boxes[:, 1] areas = w * h order = scores.argsort()[::-1] keep = [] eps = 1e-7 while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x[i], x[order[1:]]) yy1 = np.maximum(y[i], y[order[1:]]) xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]]) yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]]) w1 = np.maximum(0.0, xx2 - xx1 + 1e-5) h1 = np.maximum(0.0, yy2 - yy1 + 1e-5) inter = w1 * h1 denom = (areas[i] + areas[order[1:]] - inter) denom = np.maximum(denom, eps) ovr = inter / denom inds = np.where(ovr <= NMS_THRESH)[0] order = order[inds + 1] return np.array(keep) # ---------- 关键:把你的 9 输出拼成原来 yolov5_post_process 需要的 input_data 格式 ---------- def yolov11_to_yolov5_style_input(outputs): """ outputs: list of 9 tensors (1, C, H, W) in this order per your print: [reg80, cls80, obj80, reg40, cls40, obj40, reg20, cls20, obj20] We will convert each scale to a (H, W, 3, 5+num_classes) array and repeat the same per-anchor slice so that your existing yolov5_post_process can be reused. To avoid anchor scaling changing box_wh, we set anchors to 1x1 in later call. """ input_data = [] # scales: (indices and corresponding H,W from tensors) for i in range(0, 9, 3): reg = outputs[i][0] # (64, H, W) cls = outputs[i+1][0] # (80, H, W) obj = outputs[i+2][0] # (1, H, W) # find H,W from reg H = reg.shape[1] W = reg.shape[2] # xywh: assume first 4 channels of reg are x,y,w,h per cell xywh = reg[0:4, :, :] # (4, H, W) xywh = np.transpose(xywh, (1, 2, 0)) # (H, W, 4) # obj and cls to H,W,? obj_hw = np.transpose(obj[0, :, :], (0, 1)) # (H, W) cls_hw = np.transpose(cls, (1, 2, 0)) # (H, W, 80) # build one anchor slice: [x,y,w,h,obj, cls80] -> shape (H, W, 5+80) slice_hw = np.concatenate([xywh, obj_hw[..., None], cls_hw], axis=-1) # (H, W, 85) # repeat to make 3 anchors per cell (so shape becomes H,W,3,85) slice_3 = np.repeat(slice_hw[:, :, None, :], 3, axis=2) # (H, W, 3, 85) input_data.append(slice_3) return input_data def yolov5_post_process_adapted(input_data): """ 复用你原来的 yolov5_post_process,但使用 unit anchors so that box_wh 不会被不正确放缩。 """ masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] # unused in anchors here, but kept for compatibility # use neutral anchors (1,1) to avoid scaling change anchors = [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1]] boxes, classes, scores = [], [], [] # input_data already is list of 3 arrays shaped (H,W,3,85) for input in input_data: # process() expects shape (grid_h, grid_w, 3, attrs) b, c, s = process(input, [0,1,2], anchors) # mask and anchors values used inside process b, c, s = filter_boxes(b, c, s) boxes.append(b) classes.append(c) scores.append(s) if len(boxes) == 0: return None, None, None boxes = np.concatenate(boxes) boxes = xywh2xyxy(boxes) classes = np.concatenate(classes) scores = np.concatenate(scores) # nms per class nboxes, nclasses, nscores = [], [], [] for cls_id in set(classes): inds = np.where(classes == cls_id) b = boxes[inds] c = classes[inds] s = scores[inds] keep = nms_boxes(b, s) nboxes.append(b[keep]) nclasses.append(c[keep]) nscores.append(s[keep]) if not nclasses and not nscores: return None, None, None return np.concatenate(nboxes), np.concatenate(nclasses), np.concatenate(nscores) # ---------- draw 保持原样 ---------- def draw(image, boxes, scores, classes): for box, score, cl in zip(boxes, scores, classes): top, left, right, bottom = box top = int(top) left = int(left) cv2.rectangle(image, (top, left), (int(right), int(bottom)), (255, 0, 0), 2) cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score), (top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) # ---------- 最终 myFunc(替换你原来的 myFunc) ---------- def myFunc(rknn_lite, IMG): # 1. BGR -> RGB IMG = cv2.cvtColor(IMG, cv2.COLOR_BGR2RGB) # 2. Resize to model input size (640x640) IMG = cv2.resize(IMG, (IMG_SIZE, IMG_SIZE)) # (640, 640, 3) # 3. HWC -> CHW IMG = np.transpose(IMG, (2, 0, 1)) # (3, 640, 640) # 4. Add batch dimension: (1, 3, 640, 640) IMG_in = np.expand_dims(IMG, axis=0).astype(np.uint8) # 5. Inference outputs = rknn_lite.inference(inputs=[IMG_in]) if outputs is None: print("⚠️ Inference failed, skipping frame.") return cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR) # 6. Convert 9-output -> yolov5 style input_data input_data = yolov11_to_yolov5_style_input(outputs) # 7. Run adapted yolov5_post_process boxes, classes, scores = yolov5_post_process_adapted(input_data) # 8. 如果你只想保留 car(COCO id=2),在这里再过滤一次 if boxes is not None: keep_car = np.where(classes == 2)[0] if keep_car.size == 0: boxes, classes, scores = None, None, None else: boxes = boxes[keep_car] classes = classes[keep_car] scores = scores[keep_car] # Convert back to BGR for OpenCV display IMG_vis = cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR) if boxes is not None: draw(IMG_vis, boxes, scores, classes) return IMG_vis