import cv2 import numpy as np from rknnlite.api import RKNNLite # --------------------------- # 配置 # --------------------------- ROIS = [(604, 182, 594, 252)] # (x, y, w, h) IMG_SIZE = 640 STRIDES = [8, 16, 32] OBJ_THRESH = 0.25 MASK_THRESH = 0.5 _global_rknn = None def init_rknn_model(model_path): global _global_rknn if _global_rknn is not None: return rknn = RKNNLite(verbose=False) ret = rknn.load_rknn(model_path) if ret != 0: raise RuntimeError(f"Load RKNN failed: {ret}") ret = rknn.init_runtime() if ret != 0: raise RuntimeError(f"Init runtime failed: {ret}") _global_rknn = rknn print(f"[INFO] RKNN model loaded: {model_path}") def sigmoid(x): return 1 / (1 + np.exp(-x)) def dfl_decode(dfl): bins = np.arange(16) dfl = sigmoid(dfl) dfl /= np.sum(dfl, axis=1, keepdims=True) return np.sum(dfl * bins, axis=1) def largest_intersect_cc(mask_bin, bbox): x1, y1, x2, y2 = bbox contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: return np.zeros_like(mask_bin, dtype=np.uint8) max_inter = 0 best = np.zeros_like(mask_bin, dtype=np.uint8) for cnt in contours: tmp = np.zeros_like(mask_bin, dtype=np.uint8) cv2.drawContours(tmp, [cnt], -1, 1, -1) cx, cy, cw, ch = cv2.boundingRect(cnt) ix1 = max(cx, x1) iy1 = max(cy, y1) ix2 = min(cx + cw, x2) iy2 = min(cy + ch, y2) area = max(0, ix2 - ix1) * max(0, iy2 - iy1) if area > max_inter: max_inter = area best = tmp return best def seg_infer(roi): rknn = _global_rknn h0, w0 = roi.shape[:2] inp_img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE)) inp = inp_img[..., ::-1][None, ...] outputs = rknn.inference([inp]) proto = outputs[12][0] proto_h, proto_w = proto.shape[1:] best_score = -1 best_coef = None best_bbox = None out_i = 0 for stride in STRIDES: reg = outputs[out_i][0] cls = outputs[out_i + 1][0, 0] obj = outputs[out_i + 2][0, 0] coef = outputs[out_i + 3][0] out_i += 4 score_map = sigmoid(cls) * sigmoid(obj) y, x = np.unravel_index(np.argmax(score_map), score_map.shape) score = score_map[y, x] if score > best_score and score > OBJ_THRESH: best_score = score best_coef = coef[:, y, x] dfl = reg[:, y, x].reshape(4, 16) l, t, r, b = dfl_decode(dfl) cx = (x + 0.5) * stride cy = (y + 0.5) * stride scale = proto_w / IMG_SIZE x1 = int((cx - l) * scale) y1 = int((cy - t) * scale) x2 = int((cx + r) * scale) y2 = int((cy + b) * scale) best_bbox = (max(0, x1), max(0, y1), min(proto_w, x2), min(proto_h, y2)) if best_coef is None: return np.zeros((h0, w0), dtype=np.uint8) proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) > MASK_THRESH proto_mask = proto_mask.astype(np.uint8) mask_final = largest_intersect_cc(proto_mask, best_bbox) mask_roi = cv2.resize(mask_final, (w0, h0), interpolation=cv2.INTER_NEAREST) * 255 return mask_roi.astype(np.uint8) # --------------------------- # 主函数:支持可选可视化 # --------------------------- def caculate_yemian_diff(img, visualize=False): """ 输入: img: BGR 图像 (H, W, 3) np.ndarray visualize: bool, 是否生成可视化结果 输出: 若 visualize=False: (diff14: float, diff43: float, mask_area: int) 若 visualize=True: (diff14: float, diff43: float, mask_area: int, vis_img: np.ndarray) 失败时返回 (0.0, 0.0, 0) 或 (0.0, 0.0, 0, original_img) """ if _global_rknn is None: raise RuntimeError("RKNN model not initialized. Call init_rknn_model() first.") vis_img = img.copy() if visualize else None for (rx, ry, rw, rh) in ROIS: roi = img[ry:ry + rh, rx:rx + rw] mask_full = seg_infer(roi) mask_bin = mask_full // 255 mask_area = int(np.sum(mask_bin)) if visualize: green = np.zeros_like(roi) green[mask_bin == 1] = (0, 255, 0) vis_img[ry:ry + rh, rx:rx + rw] = cv2.addWeighted(roi, 0.7, green, 0.3, 0) contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) if not contours: continue cnt = max(contours, key=cv2.contourArea) if len(cnt) < 20: continue pts_all = cnt.reshape(-1, 2) P1 = pts_all[np.argmin(pts_all[:, 0])] # x_min P4 = pts_all[np.argmin(pts_all[:, 1])] # y_min P3 = pts_all[np.argmax(pts_all[:, 0])] # x_max # 全局坐标 P1_g = (int(P1[0] + rx), int(P1[1] + ry)) P4_g = (int(P4[0] + rx), int(P4[1] + ry)) P3_g = (int(P3[0] + rx), int(P3[1] + ry)) diff14 = float(np.linalg.norm(np.array(P1_g) - np.array(P4_g))) diff43 = float(np.linalg.norm(np.array(P4_g) - np.array(P3_g))) if visualize: roi_vis = vis_img[ry:ry + rh, rx:rx + rw] local_pts = [P1.astype(int), P4.astype(int), P3.astype(int)] colors = [(255, 0, 0), (0, 255, 0)] lengths = [diff14, diff43] # 画线 cv2.line(vis_img, P1_g, P4_g, colors[0], 2) cv2.line(vis_img, P4_g, P3_g, colors[1], 2) # 标长度 mid14 = ((P1_g[0] + P4_g[0]) // 2, (P1_g[1] + P4_g[1]) // 2 - 10) mid43 = ((P4_g[0] + P3_g[0]) // 2, (P4_g[1] + P3_g[1]) // 2 - 10) cv2.putText(vis_img, f"{diff14:.1f}", mid14, cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[0], 1) cv2.putText(vis_img, f"{diff43:.1f}", mid43, cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[1], 1) # 标点名 labels = ["P1", "P4", "P3"] points = [P1_g, P4_g, P3_g] offsets = [(-25, -10), (-25, -10), (10, -10)] for lab, pt, off in zip(labels, points, offsets): x = max(10, min(pt[0] + off[0], img.shape[1] - 50)) y = max(20, min(pt[1] + off[1], img.shape[0] - 10)) cv2.putText(vis_img, lab, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) # 面积 cv2.putText(vis_img, f"Area: {mask_area}", (rx + 10, ry + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) if visualize: return diff14, diff43, mask_area, vis_img else: return diff14, diff43, mask_area # 检测失败 if visualize: return 0.0, 0.0, 0, img.copy() else: return 0.0, 0.0, 0 # --------------------------- # 示例用法 # --------------------------- if __name__ == "__main__": init_rknn_model("60seg.rknn") img = cv2.imread("1.png") if img is None: raise FileNotFoundError("1.png") # 不可视化 d14, d43, area = caculate_yemian_diff(img) print(f"Without vis: {d14:.2f}, {d43:.2f}, {area}") # 可视化 d14, d43, area, vis = caculate_yemian_diff(img, visualize=True) cv2.imwrite("output_vis.png", vis) print(f"With vis: saved to output_vis.png")