diff --git a/yemian_seg_diff_60/test_image/1.png b/yemian_seg_diff-area_60/1.png similarity index 100% rename from yemian_seg_diff_60/test_image/1.png rename to yemian_seg_diff-area_60/1.png diff --git a/yemian_seg_diff_60/test_image/2.png b/yemian_seg_diff-area_60/2.png similarity index 100% rename from yemian_seg_diff_60/test_image/2.png rename to yemian_seg_diff-area_60/2.png diff --git a/yemian_seg_diff_60/test_image/3.png b/yemian_seg_diff-area_60/3.png similarity index 100% rename from yemian_seg_diff_60/test_image/3.png rename to yemian_seg_diff-area_60/3.png diff --git a/yemian_seg_diff-area_60/4.png b/yemian_seg_diff-area_60/4.png new file mode 100644 index 0000000..c56b9f9 Binary files /dev/null and b/yemian_seg_diff-area_60/4.png differ diff --git a/yemian_seg_diff_60/60seg.rknn b/yemian_seg_diff-area_60/60seg.rknn similarity index 100% rename from yemian_seg_diff_60/60seg.rknn rename to yemian_seg_diff-area_60/60seg.rknn diff --git a/yemian_seg_diff-area_60/60seg_diff_area.py b/yemian_seg_diff-area_60/60seg_diff_area.py new file mode 100644 index 0000000..6e425cd --- /dev/null +++ b/yemian_seg_diff-area_60/60seg_diff_area.py @@ -0,0 +1,215 @@ +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# --------------------------- +# 配置 +# --------------------------- +ROIS = [(604, 182, 594, 252)] # (x, y, w, h) + +IMG_SIZE = 640 +STRIDES = [8, 16, 32] +OBJ_THRESH = 0.25 +MASK_THRESH = 0.5 + +_global_rknn = None + + +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + ret = rknn.init_runtime() + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + _global_rknn = rknn + print(f"[INFO] RKNN model loaded: {model_path}") + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def dfl_decode(dfl): + bins = np.arange(16) + dfl = sigmoid(dfl) + dfl /= np.sum(dfl, axis=1, keepdims=True) + return np.sum(dfl * bins, axis=1) + + +def largest_intersect_cc(mask_bin, bbox): + x1, y1, x2, y2 = bbox + contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if not contours: + return np.zeros_like(mask_bin, dtype=np.uint8) + max_inter = 0 + best = np.zeros_like(mask_bin, dtype=np.uint8) + for cnt in contours: + tmp = np.zeros_like(mask_bin, dtype=np.uint8) + cv2.drawContours(tmp, [cnt], -1, 1, -1) + cx, cy, cw, ch = cv2.boundingRect(cnt) + ix1 = max(cx, x1) + iy1 = max(cy, y1) + ix2 = min(cx + cw, x2) + iy2 = min(cy + ch, y2) + area = max(0, ix2 - ix1) * max(0, iy2 - iy1) + if area > max_inter: + max_inter = area + best = tmp + return best + + +def seg_infer(roi): + rknn = _global_rknn + h0, w0 = roi.shape[:2] + inp_img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE)) + inp = inp_img[..., ::-1][None, ...] + outputs = rknn.inference([inp]) + proto = outputs[12][0] + proto_h, proto_w = proto.shape[1:] + best_score = -1 + best_coef = None + best_bbox = None + out_i = 0 + for stride in STRIDES: + reg = outputs[out_i][0] + cls = outputs[out_i + 1][0, 0] + obj = outputs[out_i + 2][0, 0] + coef = outputs[out_i + 3][0] + out_i += 4 + score_map = sigmoid(cls) * sigmoid(obj) + y, x = np.unravel_index(np.argmax(score_map), score_map.shape) + score = score_map[y, x] + if score > best_score and score > OBJ_THRESH: + best_score = score + best_coef = coef[:, y, x] + dfl = reg[:, y, x].reshape(4, 16) + l, t, r, b = dfl_decode(dfl) + cx = (x + 0.5) * stride + cy = (y + 0.5) * stride + scale = proto_w / IMG_SIZE + x1 = int((cx - l) * scale) + y1 = int((cy - t) * scale) + x2 = int((cx + r) * scale) + y2 = int((cy + b) * scale) + best_bbox = (max(0, x1), max(0, y1), min(proto_w, x2), min(proto_h, y2)) + if best_coef is None: + return np.zeros((h0, w0), dtype=np.uint8) + proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) > MASK_THRESH + proto_mask = proto_mask.astype(np.uint8) + mask_final = largest_intersect_cc(proto_mask, best_bbox) + mask_roi = cv2.resize(mask_final, (w0, h0), interpolation=cv2.INTER_NEAREST) * 255 + return mask_roi.astype(np.uint8) + + +# --------------------------- +# 主函数:支持可选可视化 +# --------------------------- +def caculate_yemian_diff(img, visualize=False): + """ + 输入: + img: BGR 图像 (H, W, 3) np.ndarray + visualize: bool, 是否生成可视化结果 + + 输出: + 若 visualize=False: + (diff14: float, diff43: float, mask_area: int) + 若 visualize=True: + (diff14: float, diff43: float, mask_area: int, vis_img: np.ndarray) + + 失败时返回 (0.0, 0.0, 0) 或 (0.0, 0.0, 0, original_img) + """ + if _global_rknn is None: + raise RuntimeError("RKNN model not initialized. Call init_rknn_model() first.") + + vis_img = img.copy() if visualize else None + + for (rx, ry, rw, rh) in ROIS: + roi = img[ry:ry + rh, rx:rx + rw] + mask_full = seg_infer(roi) + mask_bin = mask_full // 255 + mask_area = int(np.sum(mask_bin)) + + if visualize: + green = np.zeros_like(roi) + green[mask_bin == 1] = (0, 255, 0) + vis_img[ry:ry + rh, rx:rx + rw] = cv2.addWeighted(roi, 0.7, green, 0.3, 0) + + contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + if not contours: + continue + cnt = max(contours, key=cv2.contourArea) + if len(cnt) < 20: + continue + + pts_all = cnt.reshape(-1, 2) + P1 = pts_all[np.argmin(pts_all[:, 0])] # x_min + P4 = pts_all[np.argmin(pts_all[:, 1])] # y_min + P3 = pts_all[np.argmax(pts_all[:, 0])] # x_max + + # 全局坐标 + P1_g = (int(P1[0] + rx), int(P1[1] + ry)) + P4_g = (int(P4[0] + rx), int(P4[1] + ry)) + P3_g = (int(P3[0] + rx), int(P3[1] + ry)) + + diff14 = float(np.linalg.norm(np.array(P1_g) - np.array(P4_g))) + diff43 = float(np.linalg.norm(np.array(P4_g) - np.array(P3_g))) + + if visualize: + roi_vis = vis_img[ry:ry + rh, rx:rx + rw] + local_pts = [P1.astype(int), P4.astype(int), P3.astype(int)] + colors = [(255, 0, 0), (0, 255, 0)] + lengths = [diff14, diff43] + # 画线 + cv2.line(vis_img, P1_g, P4_g, colors[0], 2) + cv2.line(vis_img, P4_g, P3_g, colors[1], 2) + # 标长度 + mid14 = ((P1_g[0] + P4_g[0]) // 2, (P1_g[1] + P4_g[1]) // 2 - 10) + mid43 = ((P4_g[0] + P3_g[0]) // 2, (P4_g[1] + P3_g[1]) // 2 - 10) + cv2.putText(vis_img, f"{diff14:.1f}", mid14, cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[0], 1) + cv2.putText(vis_img, f"{diff43:.1f}", mid43, cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[1], 1) + # 标点名 + labels = ["P1", "P4", "P3"] + points = [P1_g, P4_g, P3_g] + offsets = [(-25, -10), (-25, -10), (10, -10)] + for lab, pt, off in zip(labels, points, offsets): + x = max(10, min(pt[0] + off[0], img.shape[1] - 50)) + y = max(20, min(pt[1] + off[1], img.shape[0] - 10)) + cv2.putText(vis_img, lab, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + # 面积 + cv2.putText(vis_img, f"Area: {mask_area}", (rx + 10, ry + 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + + if visualize: + return diff14, diff43, mask_area, vis_img + else: + return diff14, diff43, mask_area + + # 检测失败 + if visualize: + return 0.0, 0.0, 0, img.copy() + else: + return 0.0, 0.0, 0 + + +# --------------------------- +# 示例用法 +# --------------------------- +if __name__ == "__main__": + init_rknn_model("60seg.rknn") + img = cv2.imread("1.png") + if img is None: + raise FileNotFoundError("1.png") + + # 不可视化 + d14, d43, area = caculate_yemian_diff(img) + print(f"Without vis: {d14:.2f}, {d43:.2f}, {area}") + + # 可视化 + d14, d43, area, vis = caculate_yemian_diff(img, visualize=True) + cv2.imwrite("output_vis.png", vis) + print(f"With vis: saved to output_vis.png") \ No newline at end of file diff --git a/yemian_seg_diff-area_60/test_image/1.png b/yemian_seg_diff-area_60/test_image/1.png new file mode 100644 index 0000000..debb19b Binary files /dev/null and b/yemian_seg_diff-area_60/test_image/1.png differ diff --git a/yemian_seg_diff-area_60/test_image/2.png b/yemian_seg_diff-area_60/test_image/2.png new file mode 100644 index 0000000..e6fc403 Binary files /dev/null and b/yemian_seg_diff-area_60/test_image/2.png differ diff --git a/yemian_seg_diff-area_60/test_image/3.png b/yemian_seg_diff-area_60/test_image/3.png new file mode 100644 index 0000000..5e6e1c1 Binary files /dev/null and b/yemian_seg_diff-area_60/test_image/3.png differ diff --git a/yemian_seg_diff-area_60/test_image/4.png b/yemian_seg_diff-area_60/test_image/4.png new file mode 100644 index 0000000..c56b9f9 Binary files /dev/null and b/yemian_seg_diff-area_60/test_image/4.png differ diff --git a/yemian_seg_diff_60/main.py b/yemian_seg_diff_60/main.py deleted file mode 100644 index 299356e..0000000 --- a/yemian_seg_diff_60/main.py +++ /dev/null @@ -1,291 +0,0 @@ -import os -import cv2 -import numpy as np -from rknnlite.api import RKNNLite - -# --------------------------- -# 配置 -# --------------------------- -ROIS = [ - (604,182,594,252), -] - -IMG_SIZE = 640 -STRIDES = [8, 16, 32] -OBJ_THRESH = 0.25 -MASK_THRESH = 0.5 - -_global_rknn = None - -# --------------------------- -# RKNN 全局加载 -# --------------------------- -def init_rknn_model(model_path): - global _global_rknn - if _global_rknn is not None: - return _global_rknn - - rknn = RKNNLite(verbose=False) - ret = rknn.load_rknn(model_path) - if ret != 0: - raise RuntimeError(f"Load RKNN failed: {ret}") - ret = rknn.init_runtime() - if ret != 0: - raise RuntimeError(f"Init runtime failed: {ret}") - - _global_rknn = rknn - print(f"[INFO] RKNN Seg 模型加载成功: {model_path}") - return rknn - -# --------------------------- -# 工具函数 -# --------------------------- -def sigmoid(x): - return 1 / (1 + np.exp(-x)) - -def dfl_decode(dfl): - bins = np.arange(16) - dfl = sigmoid(dfl) - dfl /= np.sum(dfl, axis=1, keepdims=True) - return np.sum(dfl * bins, axis=1) - -def largest_intersect_cc(mask_bin, bbox): - x1, y1, x2, y2 = bbox - contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - if not contours: - return np.zeros_like(mask_bin, dtype=np.uint8) - - max_inter = 0 - best = np.zeros_like(mask_bin, dtype=np.uint8) - for cnt in contours: - tmp = np.zeros_like(mask_bin, dtype=np.uint8) - cv2.drawContours(tmp, [cnt], -1, 1, -1) - cx, cy, cw, ch = cv2.boundingRect(cnt) - ix1 = max(cx, x1) - iy1 = max(cy, y1) - ix2 = min(cx + cw, x2) - iy2 = min(cy + ch, y2) - area = max(0, ix2 - ix1) * max(0, iy2 - iy1) - if area > max_inter: - max_inter = area - best = tmp - return best - -# --------------------------- -# RANSAC 直线拟合(核心新增) -# --------------------------- -def fit_line_ransac(pts, max_dist=2.5, min_inliers_ratio=0.6, iters=100): - """ - 拟合 x = m*y + b - pts: Nx2 -> [x,y] - """ - if len(pts) < 10: - return None - - xs = pts[:, 0] - ys = pts[:, 1] - - best_m, best_b = None, None - best_inliers = 0 - - for _ in range(iters): - idx = np.random.choice(len(pts), 2, replace=False) - y1, y2 = ys[idx] - x1, x2 = xs[idx] - if abs(y2 - y1) < 1e-3: - continue - - m = (x2 - x1) / (y2 - y1) - b = x1 - m * y1 - - x_pred = m * ys + b - dist = np.abs(xs - x_pred) - inliers = dist < max_dist - cnt = np.sum(inliers) - - if cnt > best_inliers: - best_inliers = cnt - best_m, best_b = m, b - - if best_m is None: - return None - - if best_inliers / len(pts) < min_inliers_ratio: - return None - - return best_m, best_b - -# --------------------------- -# Seg 推理 -# --------------------------- -def seg_infer(roi): - rknn = _global_rknn - h0, w0 = roi.shape[:2] - - inp_img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE)) - inp = inp_img[..., ::-1][None, ...] # BGR -> RGB - outputs = rknn.inference([inp]) - - proto = outputs[12][0] - proto_h, proto_w = proto.shape[1:] - - best_score = -1 - best_coef = None - best_bbox = None - - out_i = 0 - for stride in STRIDES: - reg = outputs[out_i][0] - cls = outputs[out_i + 1][0, 0] - obj = outputs[out_i + 2][0, 0] - coef = outputs[out_i + 3][0] - out_i += 4 - - score_map = sigmoid(cls) * sigmoid(obj) - y, x = np.unravel_index(np.argmax(score_map), score_map.shape) - score = score_map[y, x] - - if score > best_score and score > OBJ_THRESH: - best_score = score - best_coef = coef[:, y, x] - - dfl = reg[:, y, x].reshape(4, 16) - l, t, r, b = dfl_decode(dfl) - - cx = (x + 0.5) * stride - cy = (y + 0.5) * stride - - scale = proto_w / IMG_SIZE - x1 = int((cx - l) * scale) - y1 = int((cy - t) * scale) - x2 = int((cx + r) * scale) - y2 = int((cy + b) * scale) - - best_bbox = ( - max(0, x1), max(0, y1), - min(proto_w, x2), min(proto_h, y2) - ) - - if best_coef is None: - return np.zeros((h0, w0), dtype=np.uint8) - - proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) > MASK_THRESH - proto_mask = proto_mask.astype(np.uint8) - - mask_final = largest_intersect_cc(proto_mask, best_bbox) - mask_roi = cv2.resize(mask_final, (w0, h0), interpolation=cv2.INTER_NEAREST) * 255 - return mask_roi.astype(np.uint8) - -# --------------------------- -# PC 后处理 -# --------------------------- -def extract_left_right_edge_points(mask_bin): - h, w = mask_bin.shape - left_pts, right_pts = [], [] - for y in range(h): - xs = np.where(mask_bin[y] > 0)[0] - if len(xs) >= 2: - left_pts.append([xs.min(), y]) - right_pts.append([xs.max(), y]) - return np.array(left_pts), np.array(right_pts) - -def filter_by_seg_y_ratio(pts, y_start=0.35, y_end=0.85): - if len(pts) < 2: - return pts - y_min, y_max = pts[:, 1].min(), pts[:, 1].max() - h = y_max - y_min - if h < 10: - return pts - y0 = y_min + int(h * y_start) - y1 = y_min + int(h * y_end) - return pts[(pts[:, 1] >= y0) & (pts[:, 1] <= y1)] - -def get_y_ref(mask_bin): - h, w = mask_bin.shape - ys = [] - for x in range(int(w * 0.2), int(w * 0.8)): - y = np.where(mask_bin[:, x] > 0)[0] - if len(y): - ys.append(y.max()) - return int(np.mean(ys)) if ys else h // 2 - -# --------------------------- -# 单张图计算函数 -# --------------------------- -def caculate_yemian_diff(img, return_vis=True): - if _global_rknn is None: - raise RuntimeError("请先 init_rknn_model()") - - vis = img.copy() if return_vis else None - result_data = None - - for rx, ry, rw, rh in ROIS: - roi = img[ry:ry + rh, rx:rx + rw] - mask_bin = seg_infer(roi) // 255 - - if return_vis: - green = np.zeros_like(roi) - green[mask_bin == 1] = (0, 255, 0) - vis[ry:ry + rh, rx:rx + rw] = cv2.addWeighted( - roi, 0.7, green, 0.3, 0 - ) - - left_pts, right_pts = extract_left_right_edge_points(mask_bin) - left_pts = filter_by_seg_y_ratio(left_pts) - right_pts = filter_by_seg_y_ratio(right_pts) - - left_line = fit_line_ransac(left_pts) - right_line = fit_line_ransac(right_pts) - if left_line is None or right_line is None: - continue - - m1, b1 = left_line - m2, b2 = right_line - - y_ref = get_y_ref(mask_bin) - x_left = int(m1 * y_ref + b1) - x_right = int(m2 * y_ref + b2) - - X_L, X_R, Y = rx + x_left, rx + x_right, ry + y_ref - diff = X_R - X_L - result_data = (X_L, Y, X_R, Y, diff) - - if return_vis: - roi_vis = vis[ry:ry + rh, rx:rx + rw] - cv2.line(roi_vis, (int(b1), 0), (int(m1 * rh + b1), rh), (0, 0, 255), 3) - cv2.line(roi_vis, (int(b2), 0), (int(m2 * rh + b2), rh), (255, 0, 0), 3) - cv2.line(roi_vis, (0, y_ref), (rw, y_ref), (0, 255, 255), 2) - cv2.circle(roi_vis, (x_left, y_ref), 6, (0, 0, 255), -1) - cv2.circle(roi_vis, (x_right, y_ref), 6, (255, 0, 0), -1) - cv2.putText( - roi_vis, f"diff={diff}px", - (10, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 1, (0, 255, 255), 2 - ) - - return result_data, vis - -# --------------------------- -# main -# --------------------------- -if __name__ == "__main__": - RKNN_MODEL_PATH = "seg700.rknn" - IMAGE_PATH = "7.png" - - init_rknn_model(RKNN_MODEL_PATH) - - img = cv2.imread(IMAGE_PATH) - if img is None: - raise FileNotFoundError(IMAGE_PATH) - - result_data, vis_img = caculate_yemian_diff(img, return_vis=True) - - if result_data: - XL, YL, XR, YR, diff = result_data - print(f"左交点: ({XL},{YL}) 右交点: ({XR},{YR}) diff={diff}px") - - if vis_img is not None: - cv2.imwrite("vis_output.png", vis_img) - print("可视化结果保存到 vis_output.png") -