import cv2 import numpy as np from rknnlite.api import RKNNLite MODEL_PATH = "detect.rknn" CLASS_NAMES = ["bag"] # 单类 class Yolo11Detector: def __init__(self, model_path): self.rknn = RKNNLite(verbose=False) # 加载 RKNN 模型 ret = self.rknn.load_rknn(model_path) assert ret == 0, "❌ Load RKNN model failed" # 初始化运行时(使用 NPU 核心0) ret = self.rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) assert ret == 0, "❌ Init runtime failed" # 模型输入大小 self.input_size = 640 # YOLO anchors(根据你训练的模型) self.anchors = { 8: [[10, 13], [16, 30], [33, 23]], 16: [[30, 61], [62, 45], [59, 119]], 32: [[116, 90], [156, 198], [373, 326]] } def preprocess(self, img): """高性能预处理:缩放+RGB""" h, w = img.shape[:2] scale = min(self.input_size / w, self.input_size / h) new_w, new_h = int(w * scale), int(h * scale) img_resized = cv2.resize(img, (new_w, new_h)) canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8) dw, dh = (self.input_size - new_w) // 2, (self.input_size - new_h) // 2 canvas[dh:dh + new_h, dw:dw + new_w, :] = img_resized img_rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB) return np.expand_dims(img_rgb, 0).astype(np.uint8), scale, dw, dh def postprocess(self, outputs, scale, dw, dh, conf_thresh=0.25, iou_thresh=0.45): """解析 YOLO 输出""" # 注意:根据 RKNN 输出节点选择 preds = outputs[0].reshape(-1, outputs[0].shape[1]) # 假设输出 [1, N, C] boxes, scores, class_ids = [], [], [] for p in preds: conf = p[4] if conf < conf_thresh: continue cls_conf = p[5] # 单类模型 score = conf * cls_conf if score < conf_thresh: continue cx, cy, w, h = p[:4] x1 = (cx - w / 2 - dw) / scale y1 = (cy - h / 2 - dh) / scale x2 = (cx + w / 2 - dw) / scale y2 = (cy + h / 2 - dh) / scale boxes.append([x1, y1, x2, y2]) scores.append(score) class_ids.append(0) # 单类 if len(boxes) == 0: return [] boxes = np.array(boxes) scores = np.array(scores) class_ids = np.array(class_ids) # 简单 NMS idxs = np.argsort(scores)[::-1] keep = [] while len(idxs) > 0: i = idxs[0] keep.append(i) if len(idxs) == 1: break x1, y1, x2, y2 = boxes[i] xx1 = np.maximum(x1, boxes[idxs[1:], 0]) yy1 = np.maximum(y1, boxes[idxs[1:], 1]) xx2 = np.minimum(x2, boxes[idxs[1:], 2]) yy2 = np.minimum(y2, boxes[idxs[1:], 3]) inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1) area_i = (x2 - x1) * (y2 - y1) area_j = (boxes[idxs[1:], 2] - boxes[idxs[1:], 0]) * (boxes[idxs[1:], 3] - boxes[idxs[1:], 1]) iou = inter / (area_i + area_j - inter + 1e-6) idxs = idxs[1:][iou < iou_thresh] results = [] for i in keep: results.append({ "box": boxes[i], "score": scores[i], "class_id": class_ids[i] }) return results def detect(self, img): img_data, scale, dw, dh = self.preprocess(img) outputs = self.rknn.inference([img_data]) results = self.postprocess(outputs, scale, dw, dh) return results def release(self): self.rknn.release() if __name__ == "__main__": detector = Yolo11Detector(MODEL_PATH) cap = cv2.VideoCapture(0) # 可以换成图片路径 while True: ret, frame = cap.read() if not ret: break results = detector.detect(frame) for r in results: x1, y1, x2, y2 = map(int, r["box"]) cls_id = r["class_id"] score = r["score"] cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(frame, f"{CLASS_NAMES[cls_id]} {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) cv2.imshow("YOLOv11 Detection", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break detector.release() cap.release()