import os import cv2 import numpy as np from rknnlite.api import RKNNLite # ====================== 配置 ====================== MODEL_PATH = "bag3588.rknn" # RKNN 模型路径 IMG_PATH = "2.jpg" # 待推理图片路径 IMG_SIZE = (640, 640) # 模型输入尺寸 (w,h) OBJ_THRESH = 0.001 # 目标置信度阈值 NMS_THRESH = 0.45 # NMS 阈值 CLASS_NAME = ["bag"] OUTPUT_DIR = "./result" os.makedirs(OUTPUT_DIR, exist_ok=True) # ====================== 工具函数 ====================== def letterbox_resize(image, size, bg_color=114): target_w, target_h = size h, w = image.shape[:2] scale = min(target_w / w, target_h / h) new_w, new_h = int(w * scale), int(h * scale) resized = cv2.resize(image, (new_w, new_h)) canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 canvas[dy:dy + new_h, dx:dx + new_w] = resized return canvas, scale, dx, dy def dfl_numpy(position): n, c, h, w = position.shape p_num = 4 mc = c // p_num y = position.reshape(n, p_num, mc, h, w) y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) acc = np.arange(mc).reshape(1,1,mc,1,1) y = np.sum(y * acc, axis=2) return y def box_process(position): grid_h, grid_w = position.shape[2:4] col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) col = col.reshape(1,1,grid_h,grid_w) row = row.reshape(1,1,grid_h,grid_w) grid = np.concatenate((col,row), axis=1) stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1) position = dfl_numpy(position) box_xy = grid + 0.5 - position[:,0:2,:,:] box_xy2 = grid + 0.5 + position[:,2:4,:,:] xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1) return xyxy def filter_boxes(boxes, box_confidences, box_class_probs): boxes = np.array(boxes).reshape(-1, 4) box_confidences = np.array(box_confidences).reshape(-1) box_class_probs = np.array(box_class_probs) class_ids = np.argmax(box_class_probs, axis=-1) class_scores = box_class_probs[np.arange(len(class_ids)), class_ids] scores = box_confidences * class_scores mask = scores >= OBJ_THRESH if np.sum(mask) == 0: return None, None, None, None boxes = boxes[mask] classes = class_ids[mask] scores = scores[mask] conf_keep = box_confidences[mask] # 原始 objectness # NMS x1, y1, x2, y2 = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) inter = w * h ovr = inter / (areas[i] + areas[order[1:]] - inter) inds = np.where(ovr <= NMS_THRESH)[0] order = order[inds + 1] return boxes[keep], classes[keep], scores[keep], conf_keep[keep] def post_process(outputs, scale, dx, dy): boxes_list, conf_list, class_list = [], [], [] branch_num = 3 for i in range(branch_num): boxes_list.append(box_process(outputs[i*3])) conf_list.append(outputs[i*3+2]) class_list.append(outputs[i*3+1]) def flatten(x): ch = x.shape[1] x = x.transpose(0,2,3,1) return x.reshape(-1,ch) boxes = np.concatenate([flatten(b) for b in boxes_list]) box_conf = np.concatenate([flatten(c) for c in conf_list]) class_probs = np.concatenate([flatten(c) for c in class_list]) boxes, classes, scores, conf_keep = filter_boxes(boxes, box_conf, class_probs) if boxes is None: return None, None, None, None boxes[:, [0,2]] -= dx boxes[:, [1,3]] -= dy boxes /= scale boxes = boxes.clip(min=0) # 将 objectness 置信度放大 255 scores = 1-scores conf_keep = conf_keep * 255 return boxes, classes, scores, conf_keep # ====================== 单张图片推理 ====================== def detect_single_image(img_path): rknn = RKNNLite(verbose=False) rknn.load_rknn(MODEL_PATH) rknn.init_runtime() img_name = os.path.basename(img_path) img = cv2.imread(img_path) if img is None: raise FileNotFoundError(f"图片无法读取: {img_path}") img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE) input_data = np.expand_dims(img_resized, 0) outputs = rknn.inference(inputs=[input_data]) boxes, classes, scores, conf_keep = post_process(outputs, scale, dx, dy) if boxes is not None: for i, box in enumerate(boxes): x1, y1, x2, y2 = box.astype(int) cls_id = classes[i] score = scores[i] cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(img, f"{CLASS_NAME[cls_id]}:{score:.1f}", (x1, max(y1-5,0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # 保存图像 if conf_keep is not None and len(conf_keep) > 0: score_strs = ["{:.0f}".format(s) for s in conf_keep] name_root, ext = os.path.splitext(img_name) new_name = name_root + "_conf_" + "_".join(score_strs) + ext else: new_name = img_name save_path = os.path.join(OUTPUT_DIR, new_name) cv2.imwrite(save_path, img) print(f"{img_name} 推理完成,结果保存到: {save_path}") rknn.release() # ====================== 调用 ====================== detect_single_image(IMG_PATH)