import os import cv2 import numpy as np from rknnlite.api import RKNNLite # --------------------------- # 配置 # --------------------------- ROIS = [ (604,182,594,252), ] IMG_SIZE = 640 STRIDES = [8, 16, 32] OBJ_THRESH = 0.25 MASK_THRESH = 0.5 _global_rknn = None # --------------------------- # RKNN 全局加载 # --------------------------- def init_rknn_model(model_path): global _global_rknn if _global_rknn is not None: return _global_rknn rknn = RKNNLite(verbose=False) ret = rknn.load_rknn(model_path) if ret != 0: raise RuntimeError(f"Load RKNN failed: {ret}") ret = rknn.init_runtime() if ret != 0: raise RuntimeError(f"Init runtime failed: {ret}") _global_rknn = rknn print(f"[INFO] RKNN Seg 模型加载成功: {model_path}") return rknn # --------------------------- # 工具函数 # --------------------------- def sigmoid(x): return 1 / (1 + np.exp(-x)) def dfl_decode(dfl): bins = np.arange(16) dfl = sigmoid(dfl) dfl /= np.sum(dfl, axis=1, keepdims=True) return np.sum(dfl * bins, axis=1) def largest_intersect_cc(mask_bin, bbox): x1, y1, x2, y2 = bbox contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: return np.zeros_like(mask_bin, dtype=np.uint8) max_inter = 0 best = np.zeros_like(mask_bin, dtype=np.uint8) for cnt in contours: tmp = np.zeros_like(mask_bin, dtype=np.uint8) cv2.drawContours(tmp, [cnt], -1, 1, -1) cx, cy, cw, ch = cv2.boundingRect(cnt) ix1 = max(cx, x1) iy1 = max(cy, y1) ix2 = min(cx + cw, x2) iy2 = min(cy + ch, y2) area = max(0, ix2 - ix1) * max(0, iy2 - iy1) if area > max_inter: max_inter = area best = tmp return best # --------------------------- # RANSAC 直线拟合(核心新增) # --------------------------- def fit_line_ransac(pts, max_dist=2.5, min_inliers_ratio=0.6, iters=100): """ 拟合 x = m*y + b pts: Nx2 -> [x,y] """ if len(pts) < 10: return None xs = pts[:, 0] ys = pts[:, 1] best_m, best_b = None, None best_inliers = 0 for _ in range(iters): idx = np.random.choice(len(pts), 2, replace=False) y1, y2 = ys[idx] x1, x2 = xs[idx] if abs(y2 - y1) < 1e-3: continue m = (x2 - x1) / (y2 - y1) b = x1 - m * y1 x_pred = m * ys + b dist = np.abs(xs - x_pred) inliers = dist < max_dist cnt = np.sum(inliers) if cnt > best_inliers: best_inliers = cnt best_m, best_b = m, b if best_m is None: return None if best_inliers / len(pts) < min_inliers_ratio: return None return best_m, best_b # --------------------------- # Seg 推理 # --------------------------- def seg_infer(roi): rknn = _global_rknn h0, w0 = roi.shape[:2] inp_img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE)) inp = inp_img[..., ::-1][None, ...] # BGR -> RGB outputs = rknn.inference([inp]) proto = outputs[12][0] proto_h, proto_w = proto.shape[1:] best_score = -1 best_coef = None best_bbox = None out_i = 0 for stride in STRIDES: reg = outputs[out_i][0] cls = outputs[out_i + 1][0, 0] obj = outputs[out_i + 2][0, 0] coef = outputs[out_i + 3][0] out_i += 4 score_map = sigmoid(cls) * sigmoid(obj) y, x = np.unravel_index(np.argmax(score_map), score_map.shape) score = score_map[y, x] if score > best_score and score > OBJ_THRESH: best_score = score best_coef = coef[:, y, x] dfl = reg[:, y, x].reshape(4, 16) l, t, r, b = dfl_decode(dfl) cx = (x + 0.5) * stride cy = (y + 0.5) * stride scale = proto_w / IMG_SIZE x1 = int((cx - l) * scale) y1 = int((cy - t) * scale) x2 = int((cx + r) * scale) y2 = int((cy + b) * scale) best_bbox = ( max(0, x1), max(0, y1), min(proto_w, x2), min(proto_h, y2) ) if best_coef is None: return np.zeros((h0, w0), dtype=np.uint8) proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) > MASK_THRESH proto_mask = proto_mask.astype(np.uint8) mask_final = largest_intersect_cc(proto_mask, best_bbox) mask_roi = cv2.resize(mask_final, (w0, h0), interpolation=cv2.INTER_NEAREST) * 255 return mask_roi.astype(np.uint8) # --------------------------- # PC 后处理 # --------------------------- def extract_left_right_edge_points(mask_bin): h, w = mask_bin.shape left_pts, right_pts = [], [] for y in range(h): xs = np.where(mask_bin[y] > 0)[0] if len(xs) >= 2: left_pts.append([xs.min(), y]) right_pts.append([xs.max(), y]) return np.array(left_pts), np.array(right_pts) def filter_by_seg_y_ratio(pts, y_start=0.35, y_end=0.85): if len(pts) < 2: return pts y_min, y_max = pts[:, 1].min(), pts[:, 1].max() h = y_max - y_min if h < 10: return pts y0 = y_min + int(h * y_start) y1 = y_min + int(h * y_end) return pts[(pts[:, 1] >= y0) & (pts[:, 1] <= y1)] def get_y_ref(mask_bin): h, w = mask_bin.shape ys = [] for x in range(int(w * 0.2), int(w * 0.8)): y = np.where(mask_bin[:, x] > 0)[0] if len(y): ys.append(y.max()) return int(np.mean(ys)) if ys else h // 2 # --------------------------- # 单张图计算函数 # --------------------------- def caculate_yemian_diff(img, return_vis=True): if _global_rknn is None: raise RuntimeError("请先 init_rknn_model()") vis = img.copy() if return_vis else None result_data = None for rx, ry, rw, rh in ROIS: roi = img[ry:ry + rh, rx:rx + rw] mask_bin = seg_infer(roi) // 255 if return_vis: green = np.zeros_like(roi) green[mask_bin == 1] = (0, 255, 0) vis[ry:ry + rh, rx:rx + rw] = cv2.addWeighted( roi, 0.7, green, 0.3, 0 ) left_pts, right_pts = extract_left_right_edge_points(mask_bin) left_pts = filter_by_seg_y_ratio(left_pts) right_pts = filter_by_seg_y_ratio(right_pts) left_line = fit_line_ransac(left_pts) right_line = fit_line_ransac(right_pts) if left_line is None or right_line is None: continue m1, b1 = left_line m2, b2 = right_line y_ref = get_y_ref(mask_bin) x_left = int(m1 * y_ref + b1) x_right = int(m2 * y_ref + b2) X_L, X_R, Y = rx + x_left, rx + x_right, ry + y_ref diff = X_R - X_L result_data = (X_L, Y, X_R, Y, diff) if return_vis: roi_vis = vis[ry:ry + rh, rx:rx + rw] cv2.line(roi_vis, (int(b1), 0), (int(m1 * rh + b1), rh), (0, 0, 255), 3) cv2.line(roi_vis, (int(b2), 0), (int(m2 * rh + b2), rh), (255, 0, 0), 3) cv2.line(roi_vis, (0, y_ref), (rw, y_ref), (0, 255, 255), 2) cv2.circle(roi_vis, (x_left, y_ref), 6, (0, 0, 255), -1) cv2.circle(roi_vis, (x_right, y_ref), 6, (255, 0, 0), -1) cv2.putText( roi_vis, f"diff={diff}px", (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2 ) return result_data, vis # --------------------------- # main # --------------------------- if __name__ == "__main__": RKNN_MODEL_PATH = "seg700.rknn" IMAGE_PATH = "7.png" init_rknn_model(RKNN_MODEL_PATH) img = cv2.imread(IMAGE_PATH) if img is None: raise FileNotFoundError(IMAGE_PATH) result_data, vis_img = caculate_yemian_diff(img, return_vis=True) if result_data: XL, YL, XR, YR, diff = result_data print(f"左交点: ({XL},{YL}) 右交点: ({XR},{YR}) diff={diff}px") if vis_img is not None: cv2.imwrite("vis_output.png", vis_img) print("可视化结果保存到 vis_output.png")