import cv2 import numpy as np from rknnlite.api import RKNNLite # ====================== # 配置 # ====================== IMAGE_PATH = "3.png" MODEL_PATH = "segr.rknn" OUT_OVERLAY = "result_overlay.jpg" DEBUG_INPUT = "debug_input_roi.png" DEBUG_PROTO = "debug_proto_mask.png" DEBUG_INST_PROTO = "debug_inst_proto.png" IMG_SIZE = 640 OBJ_THRESH = 0.25 MASK_THRESH = 0.5 STRIDES = [8, 16, 32] ROIS = [ (670, 623, 465, 178), ] # ====================== # 工具函数 # ====================== def sigmoid(x): return 1 / (1 + np.exp(-x)) def resize_to_640(img): """seg 专用:禁止 letterbox,直接 resize""" return cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR) def dfl_decode(dfl): bins = np.arange(16) dfl = sigmoid(dfl) dfl /= np.sum(dfl, axis=1, keepdims=True) return np.sum(dfl * bins, axis=1) def largest_cc(mask): num, labels = cv2.connectedComponents(mask.astype(np.uint8)) if num <= 1: return mask areas = [(labels == i).sum() for i in range(1, num)] return (labels == (np.argmax(areas) + 1)).astype(np.uint8) # ====================== # 单 ROI 推理(完整语义 mask) # ====================== def infer_single_roi(rknn, roi): h0, w0 = roi.shape[:2] # ---------- 1️⃣ 正确的 seg 输入 ---------- inp_img = resize_to_640(roi) cv2.imwrite(DEBUG_INPUT, inp_img) inp = inp_img[..., ::-1][None, ...] outputs = rknn.inference([inp]) # ---------- 2️⃣ proto ---------- proto = outputs[12][0] # (32,160,160) best_score = -1 best_coef = None out_i = 0 for stride in STRIDES: reg = outputs[out_i][0] cls = outputs[out_i + 1][0, 0] obj = outputs[out_i + 2][0, 0] coef = outputs[out_i + 3][0] out_i += 4 score_map = sigmoid(cls) * sigmoid(obj) y, x = np.unravel_index(np.argmax(score_map), score_map.shape) score = score_map[y, x] if score < OBJ_THRESH or score <= best_score: continue best_score = score best_coef = coef[:, y, x] if best_coef is None: return None # ---------- 3️⃣ proto_mask(完整) ---------- proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) # (160,160) pm = (proto_mask - proto_mask.min()) / (proto_mask.max() - proto_mask.min() + 1e-6) cv2.imwrite(DEBUG_PROTO, (pm * 255).astype(np.uint8)) # ---------- 4️⃣ 二值化 + 最大连通域(不裁!) ---------- inst_proto = (proto_mask > MASK_THRESH).astype(np.uint8) inst_proto = largest_cc(inst_proto) cv2.imwrite(DEBUG_INST_PROTO, inst_proto * 255) # ---------- 5️⃣ proto → ROI ---------- inst_roi = cv2.resize( inst_proto, (w0, h0), interpolation=cv2.INTER_NEAREST ) return inst_roi * 255 # ====================== # 主程序 # ====================== def main(): img = cv2.imread(IMAGE_PATH) overlay = img.copy() rknn = RKNNLite() rknn.load_rknn(MODEL_PATH) rknn.init_runtime() for (x, y, w, h) in ROIS: roi = img[y:y + h, x:x + w] mask = infer_single_roi(rknn, roi) if mask is None: continue color = np.zeros_like(roi) color[mask == 255] = (0, 255, 0) overlay[y:y + h, x:x + w] = cv2.addWeighted( roi, 0.7, color, 0.3, 0 ) rknn.release() cv2.imwrite(OUT_OVERLAY, overlay) print("✅ 完成:", OUT_OVERLAY) if __name__ == "__main__": main()