Files
琉璃月光 1ec9bbab60 2.0
2025-10-22 17:52:29 +08:00

135 lines
4.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import numpy as np
from rknnlite.api import RKNNLite
MODEL_PATH = "detect.rknn"
CLASS_NAMES = ["bag"] # 单类
class Yolo11Detector:
def __init__(self, model_path):
self.rknn = RKNNLite(verbose=False)
# 加载 RKNN 模型
ret = self.rknn.load_rknn(model_path)
assert ret == 0, "❌ Load RKNN model failed"
# 初始化运行时(使用 NPU 核心0
ret = self.rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
assert ret == 0, "❌ Init runtime failed"
# 模型输入大小
self.input_size = 640
# YOLO anchors根据你训练的模型
self.anchors = {
8: [[10, 13], [16, 30], [33, 23]],
16: [[30, 61], [62, 45], [59, 119]],
32: [[116, 90], [156, 198], [373, 326]]
}
def preprocess(self, img):
"""高性能预处理:缩放+RGB"""
h, w = img.shape[:2]
scale = min(self.input_size / w, self.input_size / h)
new_w, new_h = int(w * scale), int(h * scale)
img_resized = cv2.resize(img, (new_w, new_h))
canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8)
dw, dh = (self.input_size - new_w) // 2, (self.input_size - new_h) // 2
canvas[dh:dh + new_h, dw:dw + new_w, :] = img_resized
img_rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
return np.expand_dims(img_rgb, 0).astype(np.uint8), scale, dw, dh
def postprocess(self, outputs, scale, dw, dh, conf_thresh=0.25, iou_thresh=0.45):
"""解析 YOLO 输出"""
# 注意:根据 RKNN 输出节点选择
preds = outputs[0].reshape(-1, outputs[0].shape[1]) # 假设输出 [1, N, C]
boxes, scores, class_ids = [], [], []
for p in preds:
conf = p[4]
if conf < conf_thresh:
continue
cls_conf = p[5] # 单类模型
score = conf * cls_conf
if score < conf_thresh:
continue
cx, cy, w, h = p[:4]
x1 = (cx - w / 2 - dw) / scale
y1 = (cy - h / 2 - dh) / scale
x2 = (cx + w / 2 - dw) / scale
y2 = (cy + h / 2 - dh) / scale
boxes.append([x1, y1, x2, y2])
scores.append(score)
class_ids.append(0) # 单类
if len(boxes) == 0:
return []
boxes = np.array(boxes)
scores = np.array(scores)
class_ids = np.array(class_ids)
# 简单 NMS
idxs = np.argsort(scores)[::-1]
keep = []
while len(idxs) > 0:
i = idxs[0]
keep.append(i)
if len(idxs) == 1:
break
x1, y1, x2, y2 = boxes[i]
xx1 = np.maximum(x1, boxes[idxs[1:], 0])
yy1 = np.maximum(y1, boxes[idxs[1:], 1])
xx2 = np.minimum(x2, boxes[idxs[1:], 2])
yy2 = np.minimum(y2, boxes[idxs[1:], 3])
inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
area_i = (x2 - x1) * (y2 - y1)
area_j = (boxes[idxs[1:], 2] - boxes[idxs[1:], 0]) * (boxes[idxs[1:], 3] - boxes[idxs[1:], 1])
iou = inter / (area_i + area_j - inter + 1e-6)
idxs = idxs[1:][iou < iou_thresh]
results = []
for i in keep:
results.append({
"box": boxes[i],
"score": scores[i],
"class_id": class_ids[i]
})
return results
def detect(self, img):
img_data, scale, dw, dh = self.preprocess(img)
outputs = self.rknn.inference([img_data])
results = self.postprocess(outputs, scale, dw, dh)
return results
def release(self):
self.rknn.release()
if __name__ == "__main__":
detector = Yolo11Detector(MODEL_PATH)
cap = cv2.VideoCapture(0) # 可以换成图片路径
while True:
ret, frame = cap.read()
if not ret:
break
results = detector.detect(frame)
for r in results:
x1, y1, x2, y2 = map(int, r["box"])
cls_id = r["class_id"]
score = r["score"]
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, f"{CLASS_NAMES[cls_id]} {score:.2f}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
cv2.imshow("YOLOv11 Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
detector.release()
cap.release()