import os import cv2 import numpy as np from rknnlite.api import RKNNLite # --------------------------- # 配置 # --------------------------- ROIS = [ (445, 540, 931, 319), ] IMG_SIZE = 640 STRIDES = [8, 16, 32] OBJ_THRESH = 0.25 MASK_THRESH = 0.5 _global_rknn = None # --------------------------- # RKNN 全局加载 # --------------------------- def init_rknn_model(model_path): global _global_rknn if _global_rknn is not None: return _global_rknn rknn = RKNNLite(verbose=False) ret = rknn.load_rknn(model_path) if ret != 0: raise RuntimeError(f"Load RKNN failed: {ret}") ret = rknn.init_runtime() if ret != 0: raise RuntimeError(f"Init runtime failed: {ret}") _global_rknn = rknn print(f"[INFO] RKNN Seg 模型加载成功: {model_path}") return rknn # --------------------------- # 工具函数 # --------------------------- def sigmoid(x): return 1 / (1 + np.exp(-x)) def dfl_decode(dfl): bins = np.arange(16) dfl = sigmoid(dfl) dfl /= np.sum(dfl, axis=1, keepdims=True) return np.sum(dfl * bins, axis=1) def largest_intersect_cc(mask_bin, bbox): x1, y1, x2, y2 = bbox contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if len(contours) == 0: return np.zeros_like(mask_bin, dtype=np.uint8) max_inter_area = 0 mask_final = np.zeros_like(mask_bin, dtype=np.uint8) for cnt in contours: cnt_mask = np.zeros_like(mask_bin, dtype=np.uint8) cv2.drawContours(cnt_mask, [cnt], -1, 1, -1) cx, cy, cw, ch = cv2.boundingRect(cnt) cx2, cy2 = cx+cw, cy+ch inter_x1 = max(cx, x1) inter_y1 = max(cy, y1) inter_x2 = min(cx2, x2) inter_y2 = min(cy2, y2) inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1) if inter_area > max_inter_area: max_inter_area = inter_area mask_final = cnt_mask return mask_final # --------------------------- # Seg 推理 # --------------------------- def seg_infer(roi): rknn = _global_rknn h0, w0 = roi.shape[:2] inp_img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE)) inp = inp_img[..., ::-1][None, ...] # BGR->RGB outputs = rknn.inference([inp]) proto = outputs[12][0] proto_h, proto_w = proto.shape[1:] best_score = -1 best_coef = None best_bbox = None out_i = 0 for stride in STRIDES: reg = outputs[out_i][0] cls = outputs[out_i+1][0,0] obj = outputs[out_i+2][0,0] coef = outputs[out_i+3][0] out_i += 4 score_map = sigmoid(cls) * sigmoid(obj) y, x = np.unravel_index(np.argmax(score_map), score_map.shape) score = score_map[y, x] if score > best_score and score > OBJ_THRESH: best_score = score best_coef = coef[:, y, x] dfl = reg[:, y, x].reshape(4,16) l,t,r,b = dfl_decode(dfl) cx = (x+0.5)*stride cy = (y+0.5)*stride # proto bbox scale = proto_w / IMG_SIZE x1 = int((cx-l)*scale) y1 = int((cy-t)*scale) x2 = int((cx+r)*scale) y2 = int((cy+b)*scale) x1,y1 = max(0,x1), max(0,y1) x2,y2 = min(proto_w,x2), min(proto_h,y2) best_bbox = (x1,y1,x2,y2) if best_coef is None: return np.zeros((h0,w0), dtype=np.uint8) proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) > MASK_THRESH proto_mask = proto_mask.astype(np.uint8) mask_final = largest_intersect_cc(proto_mask, best_bbox) mask_roi = cv2.resize(mask_final, (w0,h0), interpolation=cv2.INTER_NEAREST) * 255 return mask_roi.astype(np.uint8) # --------------------------- # PC 后处理 # --------------------------- def extract_left_right_edge_points(mask_bin): h, w = mask_bin.shape left_pts, right_pts = [], [] for y in range(h): xs = np.where(mask_bin[y]>0)[0] if len(xs)>=2: left_pts.append([xs.min(), y]) right_pts.append([xs.max(), y]) return np.array(left_pts), np.array(right_pts) def filter_by_seg_y_ratio(pts, y_start=0.35, y_end=0.85): if len(pts)<2: return pts y_min, y_max = pts[:,1].min(), pts[:,1].max() h = y_max - y_min if h<10: return pts y0 = y_min + int(h*y_start) y1 = y_min + int(h*y_end) return pts[(pts[:,1]>=y0) & (pts[:,1]<=y1)] def fit_line(pts): if len(pts)<2: return None m,b = np.polyfit(pts[:,1], pts[:,0],1) return m,b def get_y_ref(mask_bin): h,w = mask_bin.shape ys=[] for x in range(int(w*0.2), int(w*0.8)): y = np.where(mask_bin[:,x]>0)[0] if len(y): ys.append(y.max()) return int(np.mean(ys)) if ys else h//2 # --------------------------- # 单张图计算函数 # --------------------------- def caculate_yemian_diff(img, return_vis=True): if _global_rknn is None: raise RuntimeError("请先 init_rknn_model() 加载 RKNN 模型") vis = img.copy() if return_vis else None result_data = None for rx,ry,rw,rh in ROIS: roi = img[ry:ry+rh, rx:rx+rw] mask_bin = seg_infer(roi)//255 if return_vis: green = np.zeros_like(roi) green[mask_bin==1]=(0,255,0) vis[ry:ry+rh, rx:rx+rw] = cv2.addWeighted(roi,0.7,green,0.3,0) # 边界点处理 left_pts, right_pts = extract_left_right_edge_points(mask_bin) left_pts = filter_by_seg_y_ratio(left_pts) right_pts = filter_by_seg_y_ratio(right_pts) left_line = fit_line(left_pts) right_line = fit_line(right_pts) if left_line is None or right_line is None: continue m1,b1 = left_line m2,b2 = right_line y_ref = get_y_ref(mask_bin) x_left = int(m1*y_ref + b1) x_right = int(m2*y_ref + b2) X_L, X_R, Y = rx+x_left, rx+x_right, ry+y_ref diff = X_R - X_L result_data = (X_L,Y,X_R,Y,diff) if return_vis: roi_vis = vis[ry:ry+rh, rx:rx+rw] for (m,b),c in [((m1,b1),(0,0,255)), ((m2,b2),(255,0,0))]: cv2.line(roi_vis, (int(m*0+b),0),(int(m*rh+b),rh),c,3) cv2.line(roi_vis,(0,y_ref),(rw,y_ref),(0,255,255),2) cv2.circle(roi_vis,(x_left,y_ref),6,(0,0,255),-1) cv2.circle(roi_vis,(x_right,y_ref),6,(255,0,0),-1) cv2.putText(roi_vis,f"diff={diff}px",(10,40),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2) return result_data, vis # --------------------------- # main 测试 # --------------------------- if __name__=="__main__": RKNN_MODEL_PATH = "61seg.rknn" IMAGE_PATH = "./test_image/33.png" init_rknn_model(RKNN_MODEL_PATH) img = cv2.imread(IMAGE_PATH) if img is None: raise FileNotFoundError(f"无法读取图片: {IMAGE_PATH}") result_data, vis_img = caculate_yemian_diff(img, return_vis=True) if result_data: XL,YL,XR,YR,diff = result_data print(f"左交点: ({XL},{YL}), 右交点: ({XR},{YR}), diff={diff}px") if vis_img is not None: cv2.imwrite("vis_output.png", vis_img) print("可视化结果保存到 vis_output.png")