This commit is contained in:
琉璃月光
2025-10-21 14:11:52 +08:00
parent 349449f2b7
commit df7c0730f5
363 changed files with 5386 additions and 578 deletions

Binary file not shown.

BIN
yemian/best.pt Normal file

Binary file not shown.

BIN
yemian/resize/best.pt Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 184 KiB

View File

@ -0,0 +1 @@
0 0.317016 0.412804 0.274836 0.624093 0.466866 0.652477 0.748806 0.636707 0.729938 0.409651 0.482407 0.422264 0.436896 0.425418

Binary file not shown.

After

Width:  |  Height:  |  Size: 183 KiB

View File

@ -0,0 +1 @@
0 0.314796 0.422264 0.268177 0.620940 0.462427 0.652477 0.756576 0.627247 0.732158 0.409651 0.480187 0.425418 0.432457 0.431725

Binary file not shown.

After

Width:  |  Height:  |  Size: 185 KiB

View File

@ -0,0 +1 @@
0 0.315906 0.409651 0.274836 0.617786 0.466866 0.643014 0.751026 0.630400 0.733267 0.406497 0.490177 0.422264 0.434676 0.419111

View File

@ -0,0 +1,52 @@
import os
import cv2
# ----------------------------
# 配置
# ----------------------------
SOURCE_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize_seg2" # 原始图片目录
TARGET_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize_seg3" # 输出目录
TARGET_SIZE = 640 # resize 尺寸
SUBSETS = ["train", "val", "test"]
# ----------------------------
# 全局 ROI (x, y, w, h)
# ----------------------------
GLOBAL_ROI = [562, 798, 1287, 453]
# ----------------------------
# 主处理函数
# ----------------------------
def process_images():
x, y, w, h = GLOBAL_ROI
for subset in SUBSETS:
src_dir = os.path.join(SOURCE_DIR, subset)
tgt_dir = os.path.join(TARGET_DIR, subset)
os.makedirs(tgt_dir, exist_ok=True)
for file in os.listdir(src_dir):
if not (file.endswith(".jpg") or file.endswith(".png")):
continue
img_path = os.path.join(src_dir, file)
img = cv2.imread(img_path)
if img is None:
print(f"❌ 无法读取图片: {img_path}")
continue
h_img, w_img = img.shape[:2]
x1, y1 = max(0, x), max(0, y)
x2, y2 = min(w_img, x + w), min(h_img, y + h)
cropped = img[y1:y2, x1:x2]
if cropped.size == 0:
print(f"❌ 裁剪结果为空: {file}")
continue
resized = cv2.resize(cropped, (TARGET_SIZE, TARGET_SIZE))
tgt_path = os.path.join(tgt_dir, file)
cv2.imwrite(tgt_path, resized)
print(f"✅ 图片处理完成: {subset}/{file}")
if __name__ == "__main__":
process_images()

View File

@ -0,0 +1,93 @@
import os
# ----------------------------
# 配置
# ----------------------------
SOURCE_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize_seg2" # 原始图片目录
TARGET_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize_seg3" # 输出目录
SUBSETS = ["train", "val", "test"]
# 全局 ROI (x, y, w, h)
GLOBAL_ROI = [562, 798, 1287, 453]
# ----------------------------
# 分割标签处理函数
# ----------------------------
def adjust_seg_labels(label_path, crop_coords, orig_size):
x1, y1, x2, y2 = crop_coords
crop_w = x2 - x1
crop_h = y2 - y1
new_labels = []
if not os.path.exists(label_path):
print(f"⚠️ 标签源文件不存在: {label_path}")
return new_labels
with open(label_path, 'r') as f:
lines = f.readlines()
orig_w, orig_h = orig_size
for line in lines:
parts = line.strip().split()
if len(parts) < 3 or len(parts) % 2 == 0:
continue # 至少类别 + 一个点(x,y)
cls = parts[0]
coords = list(map(float, parts[1:]))
new_coords = []
for i in range(0, len(coords), 2):
x = coords[i] * orig_w
y = coords[i+1] * orig_h
x -= x1
y -= y1
x_new = x / crop_w
y_new = y / crop_h
new_coords.extend([x_new, y_new])
new_line = cls + " " + " ".join([f"{c:.6f}" for c in new_coords])
new_labels.append(new_line)
return new_labels
# ----------------------------
# 主处理函数
# ----------------------------
def process_seg_labels():
x, y, w, h = GLOBAL_ROI
fixed_roi = (x, y, x + w, y + h)
for subset in SUBSETS:
src_dir = os.path.join(SOURCE_DIR, subset)
tgt_dir = os.path.join(TARGET_DIR, subset)
os.makedirs(tgt_dir, exist_ok=True)
for file in os.listdir(src_dir):
if not (file.endswith(".txt")):
continue
label_path = os.path.join(src_dir, file)
# 读取原图尺寸
img_name = os.path.splitext(file)[0] + ".jpg"
img_path = os.path.join(src_dir, img_name)
if not os.path.exists(img_path):
print(f"❌ 无法读取图片以获取尺寸: {img_path}")
continue
import cv2
img = cv2.imread(img_path)
if img is None:
print(f"❌ 无法读取图片: {img_path}")
continue
h_img, w_img = img.shape[:2]
new_labels = adjust_seg_labels(label_path, fixed_roi, (w_img, h_img))
tgt_label_path = os.path.join(tgt_dir, file)
with open(tgt_label_path, 'w') as f:
f.write("\n".join(new_labels))
print(f"✅ 标签处理完成: {subset}/{file}, 条数 {len(new_labels)}")
if __name__ == "__main__":
process_seg_labels()

View File

@ -0,0 +1 @@
562, 798, 1287, 453

View File

@ -6,12 +6,12 @@ import numpy as np
from ultralytics import YOLO
from pathlib import Path
# ====================== 配置参数 ======================
MODEL_PATH = "/home/hx/yolo/ultralytics_yolo11-main/runs/train/seg_r/exp2/weights/best.pt"
# ====================== 配置参数 ======================3
MODEL_PATH = "best.pt"
#SOURCE_IMG_DIR = "/home/hx/yolo/output_masks" # 原始输入图像目录
SOURCE_IMG_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/f6" # 原始输入图像目录
SOURCE_IMG_DIR = "/home/hx/yolo/yemian/test_image" # 原始输入图像目录
OUTPUT_DIR = "/home/hx/yolo/output_masks2" # 推理输出根目录
ROI_COORDS_FILE = "./roi_coordinates/1_rois.txt" # 必须与训练时相同
ROI_COORDS_FILE = "./roi_coordinates/1_rois2.txt" # 必须与训练时相同
CONF_THRESHOLD = 0.25
IOU_THRESHOLD = 0.45
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

Binary file not shown.

After

Width:  |  Height:  |  Size: 504 KiB

View File

@ -0,0 +1 @@
0 0.378906 0.684028 0.357701 0.750496 0.454241 0.759425 0.595982 0.754464 0.586496 0.683036 0.462054 0.687004 0.439174 0.687996

Binary file not shown.

After

Width:  |  Height:  |  Size: 501 KiB

View File

@ -0,0 +1 @@
0 0.377790 0.687004 0.354353 0.749504 0.452009 0.759425 0.599888 0.751488 0.587612 0.683036 0.460938 0.687996 0.436942 0.689980

Binary file not shown.

After

Width:  |  Height:  |  Size: 508 KiB

View File

@ -0,0 +1 @@
0 0.378348 0.683036 0.357701 0.748512 0.454241 0.756448 0.597098 0.752480 0.588170 0.682044 0.465960 0.687004 0.438058 0.686012

81
yemian/resize_labels.py Normal file
View File

@ -0,0 +1,81 @@
#!/usr/bin/env python3
import os, cv2, numpy as np
# ========== 配置 ==========
roi_label_dir = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/label" # 640x640 ROI 标签
original_image_dir = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/class1"
roi_coords_file = "/home/hx/yolo/zhuangtai_class_cls/roi_coordinates/1_rois.txt" # 每行: x,y,w,h
output_label_dir = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/classdata3_orig_norm_labels"
debug_vis = True
debug_vis_dir = output_label_dir + "_vis"
# ==========================
os.makedirs(output_label_dir, exist_ok=True)
if debug_vis:
os.makedirs(debug_vis_dir, exist_ok=True)
# ----------- 加载 ROI -----------
with open(roi_coords_file, 'r') as f:
roi_x, roi_y, roi_w, roi_h = map(int, f.readline().strip().split(','))
# ----------- 处理标签 -----------
for label_file in os.listdir(roi_label_dir):
if not label_file.lower().endswith('.txt'):
continue
base_name = os.path.splitext(label_file)[0]
# 对应原图
orig_img_path = None
for ext in ('.jpg','.jpeg','.png','.bmp'):
tmp_path = os.path.join(original_image_dir, base_name + ext)
if os.path.exists(tmp_path):
orig_img_path = tmp_path
break
if orig_img_path is None:
print(f"[SKIP] 未找到原图: {base_name}")
continue
img = cv2.imread(orig_img_path)
if img is None:
print(f"[SKIP] 无法读取原图: {orig_img_path}")
continue
h_img, w_img = img.shape[:2]
# 读取 ROI 标签
in_path = os.path.join(roi_label_dir, label_file)
lines = [ln.strip() for ln in open(in_path, 'r') if ln.strip()]
out_lines = []
for ln in lines:
parts = ln.split()
cls = parts[0]
coords = list(map(float, parts[1:]))
mapped = []
for i in range(0, len(coords), 2):
x_n, y_n = coords[i], coords[i+1] # ROI 0~1
x_abs = roi_x + x_n * roi_w
y_abs = roi_y + y_n * roi_h
x_abs = min(max(x_abs, 0.0), w_img-1)
y_abs = min(max(y_abs, 0.0), h_img-1)
mapped.append((x_abs / w_img, y_abs / h_img))
out_line = cls + " " + " ".join(f"{x:.6f} {y:.6f}" for x, y in mapped)
out_lines.append(out_line)
out_path = os.path.join(output_label_dir, label_file)
with open(out_path, 'w') as fw:
fw.write("\n".join(out_lines))
print(f"[OK] {out_path}")
# 可视化
if debug_vis and out_lines:
vis = img.copy()
cv2.rectangle(vis, (roi_x, roi_y), (roi_x+roi_w-1, roi_y+roi_h-1), (0,165,255), 2)
for ol in out_lines:
pts = np.array([[int(float(parts[i])*w_img), int(float(parts[i+1])*h_img)]
for parts in [ol.split()] for i in range(1, len(parts), 2)], np.int32)
if len(pts) >= 3:
cv2.polylines(vis, [pts], True, (0,0,255), 2)
vis_name = os.path.join(debug_vis_dir, os.path.basename(orig_img_path))
cv2.imwrite(vis_name, vis)
print("🎉 ROI 标签已还原到原图归一化完成!")

View File

@ -0,0 +1 @@
859,810,696,328

View File

@ -0,0 +1 @@
589,789,210,319

BIN
yemian/segr.pt Normal file

Binary file not shown.

View File

@ -1,14 +1,13 @@
import os
#!/usr/bin/env python3
import cv2
import torch
import argparse
import numpy as np
from ultralytics import YOLO
from pathlib import Path
from ultralytics import YOLO
import torch
# ====================== 配置参数 ======================
MODEL_PATH = "/home/hx/yolo/ultralytics_yolo11-main/runs/train/seg/exp7/weights/best.pt"
SOURCE_IMG_DIR = "/home/hx/yolo/output_masks"
# ====================== 配置 ======================
MODEL_PATH = "best.pt"
SOURCE_IMG_DIR = "/home/hx/yolo/yemian/test_image"
OUTPUT_DIR = "/home/hx/yolo/output_masks2"
CONF_THRESHOLD = 0.25
IOU_THRESHOLD = 0.45
@ -17,207 +16,121 @@ SAVE_TXT = True
SAVE_MASKS = True
VIEW_IMG = False
LINE_WIDTH = 2
IMG_SIZE = 640 # YOLO 输入尺寸
def plot_result_with_opacity(result, line_width=2, mask_opacity=0.5):
"""
手动绘制 YOLO 分割结果,支持掩码透明度叠加,并修复掩码尺寸不匹配问题
"""
img = result.orig_img.copy() # HWC, BGR
# ====================== Letterbox 缩放函数 ======================
def letterbox_image(img, new_size=IMG_SIZE):
h, w = img.shape[:2]
scale = min(new_size / w, new_size / h)
new_w, new_h = int(w*scale), int(h*scale)
resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
canvas = np.full((new_size, new_size, 3), 114, dtype=np.uint8)
pad_w, pad_h = new_size - new_w, new_size - new_h
pad_top, pad_left = pad_h // 2, pad_w // 2
canvas[pad_top:pad_top+new_h, pad_left:pad_left+new_w] = resized
return canvas, scale, pad_left, pad_top, new_w, new_h
# 获取原始图像尺寸
orig_shape = img.shape[:2] # (height, width)
# ====================== 绘制 mask & 边框 ======================
def plot_mask_on_image(result, orig_shape, scale, pad_left, pad_top, new_w, new_h, alpha=0.5):
H_ori, W_ori = orig_shape[:2]
img = np.zeros((H_ori, W_ori, 3), dtype=np.uint8)
if result.masks is not None and len(result.boxes) > 0:
# 将掩码从 GPU 移到 CPU 并转为 numpy
masks = result.masks.data.cpu().numpy() # (N, H_mask, W_mask)
# resize 掩码到原始图像尺寸
resized_masks = []
for mask in masks:
mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
mask_resized = (mask_resized > 0.5).astype(np.uint8) # 二值化
resized_masks.append(mask_resized)
resized_masks = np.array(resized_masks)
# 随机颜色 (BGR)
num_masks = len(result.boxes)
colors = np.random.randint(0, 255, size=(num_masks, 3), dtype=np.uint8)
# 创建叠加层
masks = result.masks.data.cpu().numpy() # (N, IMG_SIZE, IMG_SIZE)
overlay = img.copy()
for i in range(num_masks):
color = colors[i].tolist()
mask_resized = resized_masks[i]
overlay[mask_resized == 1] = color
num_masks = len(masks)
colors = np.random.randint(0,255,(num_masks,3),dtype=np.uint8)
# 透明叠加
cv2.addWeighted(overlay, mask_opacity, img, 1 - mask_opacity, 0, img)
for i, mask in enumerate(masks):
# 去掉 padding
mask_crop = mask[pad_top:pad_top+new_h, pad_left:pad_left+new_w]
# resize 回原图
mask_orig = cv2.resize(mask_crop, (W_ori, H_ori), interpolation=cv2.INTER_NEAREST)
overlay[mask_orig>0.5] = colors[i].tolist()
# 绘制边界框和标签(保持不变)
if result.boxes is not None:
boxes = result.boxes.xyxy.cpu().numpy()
classes = result.boxes.cls.cpu().numpy().astype(int)
confidences = result.boxes.conf.cpu().numpy()
colors = np.random.randint(0, 255, size=(len(classes), 3), dtype=np.uint8)
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.6
thickness = 1
for i in range(len(boxes)):
box = boxes[i].astype(int)
cls_id = classes[i]
conf = confidences[i]
color = colors[i].tolist()
# 绘制矩形框
cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, line_width)
# 标签文本
label = f"{cls_id} {conf:.2f}"
# 获取文本大小
(text_w, text_h), baseline = cv2.getTextSize(label, font, font_scale, thickness)
text_h += baseline
# 绘制标签背景
cv2.rectangle(img, (box[0], box[1] - text_h - 6), (box[0] + text_w, box[1]), color, -1)
# 绘制文本
cv2.putText(img, label, (box[0], box[1] - 4), font, font_scale,
(255, 255, 255), thickness, cv2.LINE_AA)
cv2.addWeighted(overlay, alpha, img, 1-alpha, 0, img)
return img
# ====================== 主推理 ======================
def run_segmentation():
print(f"🚀 加载模型: {MODEL_PATH}")
model = YOLO(MODEL_PATH)
model.to(DEVICE)
def run_segmentation_inference(
model_path,
source,
output_dir,
conf_threshold=0.25,
iou_threshold=0.45,
device="cuda:0",
save_txt=True,
save_masks=True,
view_img=False,
line_width=2,
):
print(f"🚀 加载模型: {model_path}")
print(f"💻 使用设备: {device}")
# 加载模型
model = YOLO(model_path)
# 创建输出目录
output_dir = Path(output_dir)
source = Path(SOURCE_IMG_DIR)
output_dir = Path(OUTPUT_DIR)
output_dir.mkdir(parents=True, exist_ok=True)
txt_dir = output_dir / "labels"
mask_dir = output_dir / "masks"
if save_txt:
txt_dir.mkdir(exist_ok=True)
if save_masks:
mask_dir.mkdir(exist_ok=True)
# 获取图像文件列表
source = Path(source)
if source.is_file():
img_files = [source]
else:
img_files = list(source.glob("*.jpg")) + \
list(source.glob("*.jpeg")) + \
list(source.glob("*.png")) + \
list(source.glob("*.bmp"))
if SAVE_TXT: txt_dir.mkdir(exist_ok=True)
if SAVE_MASKS: mask_dir.mkdir(exist_ok=True)
img_files = list(source.glob("*.jpg")) + list(source.glob("*.png"))
if not img_files:
print(f"{source}未找到图像文件")
print(f"❌ 未找到图")
return
print(f"🖼️ 待推理图片数量: {len(img_files)}")
print(f"🖼️ 共 {len(img_files)} 张图片待推理...")
# 推理循环
for img_path in img_files:
print(f"🔍 推理: {img_path.name}")
orig_img = cv2.imread(str(img_path))
if orig_img is None:
print(" ❌ 读取失败")
continue
H_ori, W_ori = orig_img.shape[:2]
# 执行推理
results = model(
source=str(img_path),
conf=conf_threshold,
iou=iou_threshold,
imgsz=640,
device=device,
verbose=True
)
# Letterbox 缩放
img_input, scale, pad_left, pad_top, new_w, new_h = letterbox_image(orig_img, IMG_SIZE)
# YOLO 推理
results = model(img_input, conf=CONF_THRESHOLD, iou=IOU_THRESHOLD, imgsz=IMG_SIZE, device=DEVICE)
result = results[0]
orig_img = result.orig_img # 原始图像
# ✅ 使用自定义绘制函数(支持透明度)
plotted = plot_result_with_opacity(result, line_width=line_width, mask_opacity=0.5)
# 可视化 mask
plotted = plot_mask_on_image(result, orig_img.shape, scale, pad_left, pad_top, new_w, new_h, alpha=0.5)
# 保存可视化图像
# 保存结果
save_path = output_dir / f"seg_{img_path.name}"
cv2.imwrite(str(save_path), plotted)
print(f"✅ 保存结果: {save_path}")
# 保存 YOLO 格式标签(多边形)
if save_txt and result.masks is not None:
txt_path = txt_dir / (img_path.stem + ".txt")
with open(txt_path, 'w') as f:
# 保存标签
if SAVE_TXT and result.masks is not None:
txt_path = txt_dir / f"{img_path.stem}.txt"
with open(txt_path,"w") as f:
for i in range(len(result.boxes)):
cls_id = int(result.boxes.cls[i])
seg = result.masks.xy[i] # 多边形点 (N, 2)
seg = seg.flatten()
seg = seg / [orig_img.shape[1], orig_img.shape[0]] # 归一化
seg = seg.tolist()
line = f"{cls_id} {' '.join(f'{x:.6f}' for x in seg)}\n"
seg = result.masks.xy[i].copy()
# 去掉 padding + scale 回原图
seg[:,0] = (seg[:,0] - pad_left) * (W_ori / new_w)
seg[:,1] = (seg[:,1] - pad_top) * (H_ori / new_h)
seg_norm = seg / [W_ori, H_ori]
seg_flat = seg_norm.flatten().tolist()
line = f"{cls_id} " + " ".join(f"{x:.6f}" for x in seg_flat) + "\n"
f.write(line)
print(f"📝 保存标签: {txt_path}")
# 保存合并的掩码图
if save_masks and result.masks is not None:
mask = result.masks.data.cpu().numpy()
combined_mask = (mask.sum(axis=0) > 0).astype(np.uint8) * 255 # 合并所有掩码
# 保存 mask
if SAVE_MASKS and result.masks is not None:
masks = result.masks.data.cpu().numpy()
combined_mask = np.zeros((H_ori, W_ori), dtype=np.uint8)
for mask in masks:
mask_crop = mask[pad_top:pad_top+new_h, pad_left:pad_left+new_w]
mask_orig = cv2.resize(mask_crop, (W_ori, H_ori), interpolation=cv2.INTER_NEAREST)
combined_mask = np.maximum(combined_mask, (mask_orig>0.5).astype(np.uint8)*255)
mask_save_path = mask_dir / f"mask_{img_path.stem}.png"
cv2.imwrite(str(mask_save_path), combined_mask)
print(f"🎨 保存掩码: {mask_save_path}")
# 实时显示(可选)
if view_img:
cv2.imshow("Segmentation Result", plotted)
if cv2.waitKey(0) == 27: # ESC 退出
# 显示
if VIEW_IMG:
cv2.imshow("Segmentation", plotted)
if cv2.waitKey(0)==27:
cv2.destroyAllWindows()
break
if view_img:
cv2.destroyAllWindows()
print(f"\n🎉 推理完成!结果保存在: {output_dir}")
print(f"🎉 推理完成!结果保存到: {output_dir}")
# ====================== 主程序 ======================
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", default=MODEL_PATH, help="模型权重路径")
parser.add_argument("--source", default=SOURCE_IMG_DIR, help="图片路径或文件夹")
parser.add_argument("--output", default=OUTPUT_DIR, help="输出目录")
parser.add_argument("--conf", type=float, default=CONF_THRESHOLD, help="置信度阈值")
parser.add_argument("--iou", type=float, default=IOU_THRESHOLD, help="IoU 阈值")
parser.add_argument("--device", default=DEVICE, help="设备: cuda:0, cpu")
parser.add_argument("--view-img", action="store_true", help="显示图像")
parser.add_argument("--save-txt", action="store_true", help="保存标签")
parser.add_argument("--save-masks", action="store_true", help="保存掩码")
opt = parser.parse_args()
run_segmentation_inference(
model_path=opt.model,
source=opt.source,
output_dir=opt.output,
conf_threshold=opt.conf,
iou_threshold=opt.iou,
device=opt.device,
save_txt=opt.save_txt,
save_masks=opt.save_masks,
view_img=opt.view_img,
line_width=LINE_WIDTH,
)
if __name__=="__main__":
run_segmentation()

Binary file not shown.

After

Width:  |  Height:  |  Size: 465 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 454 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 508 KiB

View File

@ -76,8 +76,8 @@ def draw_mask_on_image(img, labels, alpha=0.5):
def main():
# ====================== 配置区 ======================
IMG_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize/frame_20250805_121717_22320.jpg" # 修改为你的某张测试图
TXT_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize/frame_20250805_121717_22320.txt" # 对应的 .txt 文件
IMG_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize_seg3/train/frame_20250805_180639_775485.jpg" # 修改为你的某张测试图
TXT_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize_seg3/train/frame_20250805_180639_775485.txt" # 对应的 .txt 文件
OUTPUT_PATH = "/home/hx/yolo/output_masks2/debug_overlay.png"
# ====================================================

57
yemian/val_labels_film.py Normal file
View File

@ -0,0 +1,57 @@
import cv2
import numpy as np
from pathlib import Path
from val_labels import load_yolo_polygon_labels,draw_mask_on_image
# 之前的load_yolo_polygon_labels和draw_mask_on_image函数保持不变
def visualize_labels_in_folder(src_img_dir, src_label_dir, dst_dir, alpha=0.5):
"""
遍历源文件夹中的所有图片和对应标签,并在图片上绘制标签后保存至目标文件夹。
:param src_img_dir: 源图像文件夹路径
:param src_label_dir: 源标签文件夹路径
:param dst_dir: 目标文件夹路径,用于保存可视化结果
:param alpha: 叠加mask的透明度
"""
# 确保目标文件夹存在
Path(dst_dir).mkdir(parents=True, exist_ok=True)
# 获取所有的图片文件
img_paths = list(Path(src_img_dir).glob('*.jpg')) + list(Path(src_img_dir).glob('*.png'))
for img_path in img_paths:
txt_path = Path(src_label_dir) / f"{img_path.stem}.txt"
if not txt_path.exists():
print(f"⚠️ 跳过 {img_path.name},未找到对应的标签文件")
continue
# 读取图像
img = cv2.imread(str(img_path))
if img is None:
print(f"❌ 无法读取图像: {img_path}")
continue
h, w = img.shape[:2]
print(f"🖼️ 正在处理图像: {img_path.name}, 大小: {w}x{h}")
# 加载标签
labels = load_yolo_polygon_labels(txt_path, img.shape)
if len(labels) == 0:
print(f"🟡 未找到有效标签: {txt_path}")
continue
# 绘制叠加图
result_img = draw_mask_on_image(img, labels, alpha)
# 保存结果
output_path = Path(dst_dir) / img_path.name
cv2.imwrite(str(output_path), result_img)
print(f"✅ 已保存可视化结果: {output_path}")
if __name__ == "__main__":
SRC_IMG_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize1" # 修改为你的源图像文件夹路径
SRC_LABEL_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize1" # 修改为你的源标签文件夹路径
DST_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize_seg5" # 修改为目标文件夹路径
visualize_labels_in_folder(SRC_IMG_DIR, SRC_LABEL_DIR, DST_DIR)

View File

@ -0,0 +1,227 @@
# ===================================================
# final_compare_corner.py
# 同时显示 Canny 物理边缘(红线)和 YOLO 预测左边缘(绿线)
# 基于角点拟合直线,剔除离群点
# ===================================================
import os
import cv2
import numpy as np
from pathlib import Path
from ultralytics import YOLO
# ============================
# 参数
# ============================
TARGET_SIZE = 640
MAX_CORNERS = 200
QUALITY_LEVEL = 0.01
MIN_DISTANCE = 5
DIST_THRESH = 15
ROIS = [(859, 810, 696, 328)] # 全局 ROI可按需修改
OUTPUT_DIR = "./final_output"
# ============================
# Canny 边缘部分(保持原逻辑)
# ============================
def load_global_rois(txt_path):
rois = []
if not os.path.exists(txt_path):
print(f"❌ ROI 文件不存在: {txt_path}")
return rois
with open(txt_path, 'r') as f:
for line in f:
s = line.strip()
if s:
try:
x, y, w, h = map(int, s.split(','))
rois.append((x, y, w, h))
except Exception as e:
print(f"⚠️ 无法解析 ROI 行 '{s}': {e}")
return rois
def fit_line_best(points, distance_thresh=5, max_iter=5):
if len(points) < 2:
return None
points = points.astype(np.float32)
for _ in range(max_iter):
mean = np.mean(points, axis=0)
cov = np.cov(points.T)
eigvals, eigvecs = np.linalg.eig(cov)
idx = np.argmax(eigvals)
direction = eigvecs[:, idx]
vx, vy = direction
x0, y0 = mean
dists = np.abs(vy*(points[:,0]-x0) - vx*(points[:,1]-y0)) / np.hypot(vx, vy)
inliers = points[dists <= distance_thresh]
if len(inliers) == len(points) or len(inliers) < 2:
break
points = inliers
if len(points) < 2:
return None
X = points[:, 0].reshape(-1, 1)
y = points[:, 1]
try:
from sklearn.linear_model import RANSACRegressor
ransac = RANSACRegressor(residual_threshold=distance_thresh)
ransac.fit(X, y)
k = ransac.estimator_.coef_[0]
b = ransac.estimator_.intercept_
vx = 1 / np.sqrt(1 + k**2)
vy = k / np.sqrt(1 + k**2)
x0 = np.mean(points[:,0])
y0 = k*x0 + b
except:
mean = np.mean(points, axis=0)
cov = np.cov(points.T)
eigvals, eigvecs = np.linalg.eig(cov)
idx = np.argmax(eigvals)
direction = eigvecs[:, idx]
vx, vy = direction
x0, y0 = mean
return vx, vy, x0, y0
def extract_canny_overlay(image_path, roi_file, distance_thresh=3):
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图片: {image_path}")
return None
overlay_img = img.copy()
rois = load_global_rois(roi_file)
if not rois:
print("❌ 没有有效 ROI")
return overlay_img
for idx, (x, y, w, h) in enumerate(rois):
roi = img[y:y+h, x:x+w]
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200)
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
longest_contour = max(contours, key=lambda c: cv2.arcLength(c, closed=False), default=None)
if longest_contour is not None and len(longest_contour) >= 2:
points = longest_contour.reshape(-1, 2)
line = fit_line_best(points, distance_thresh=distance_thresh, max_iter=5)
if line is not None:
vx, vy, x0, y0 = line
cols = w
lefty = int(y0 - vy/vx * x0)
righty = int(y0 + vy/vx * (cols - x0))
pt1 = (x, y + lefty)
pt2 = (x + cols - 1, y + righty)
cv2.line(overlay_img, pt1, pt2, (0, 0, 255), 2) # 红色
print(f"✅ ROI {idx} Canny 边缘拟合完成")
return overlay_img
# ============================
# YOLO 角点 + 拟合直线
# ============================
def select_edge_corners(corners, w, left_ratio=0.2, right_ratio=0.2, y_var_thresh=5):
if corners is None:
return np.zeros((0,2), dtype=np.int32), np.zeros((0,2), dtype=np.int32)
corners = np.int32(corners).reshape(-1,2)
left_thresh = int(w*left_ratio)
right_thresh = w - int(w*right_ratio)
left_candidates = corners[corners[:,0]<=left_thresh]
right_candidates = corners[corners[:,0]>=right_thresh]
def filter_by_y_variation(pts):
if len(pts)<2:
return pts.astype(np.int32)
pts_sorted = pts[np.argsort(pts[:,1])]
diffs = np.abs(np.diff(pts_sorted[:,1]))
keep_idx = np.where(diffs>y_var_thresh)[0]
if len(keep_idx)==0:
return pts_sorted.astype(np.int32)
selected = [pts_sorted[i] for i in keep_idx]+[pts_sorted[i+1] for i in keep_idx]
selected = np.array(selected)
_, idx = np.unique(selected.reshape(-1,2), axis=0, return_index=True)
selected = selected[np.sort(idx)]
return selected.astype(np.int32)
return filter_by_y_variation(left_candidates), filter_by_y_variation(right_candidates)
def fit_line_with_outlier_removal(pts, dist_thresh=DIST_THRESH):
if pts is None or len(pts)<2:
return None, np.zeros((0,2), dtype=np.int32)
pts = np.array(pts, dtype=np.float64)
x, y = pts[:,0], pts[:,1]
try:
m, b = np.polyfit(y, x, 1)
except:
return None, pts.astype(np.int32)
x_fit = m*y + b
mask = np.abs(x-x_fit)<dist_thresh
if np.sum(mask)<2:
return (m,b), pts.astype(np.int32)
x2, y2 = x[mask], y[mask]
m2, b2 = np.polyfit(y2, x2, 1)
inliers = np.stack([x2,y2],axis=1).astype(np.int32)
return (m2,b2), inliers
def get_yolo_left_edge_lines(image_path, model_path, rois=ROIS, imgsz=TARGET_SIZE):
model = YOLO(model_path)
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图片: {image_path}")
return []
lines = []
for (x, y, w, h) in rois:
roi_img = img[y:y+h, x:x+w]
resized = cv2.resize(roi_img, (imgsz, imgsz))
results = model(resized, imgsz=imgsz, verbose=False)
result = results[0]
if result.masks is None or len(result.masks.data)==0:
continue
mask = result.masks.data[0].cpu().numpy()
mask_bin = (mask>0.5).astype(np.uint8)
mask_bin = cv2.resize(mask_bin, (w,h), interpolation=cv2.INTER_NEAREST)
# 角点检测
mask_gray = (mask_bin*255).astype(np.uint8)
corners = cv2.goodFeaturesToTrack(mask_gray, maxCorners=MAX_CORNERS,
qualityLevel=QUALITY_LEVEL, minDistance=MIN_DISTANCE)
left_pts, _ = select_edge_corners(corners, w)
line_params, inliers = fit_line_with_outlier_removal(left_pts)
if line_params is None:
continue
m,b = line_params
y1, y2 = 0, h-1
x1 = int(m*y1 + b)
x2 = int(m*y2 + b)
lines.append((x+x1, y+y1, x+x2, y+y2))
return lines
# ============================
# 对比融合
# ============================
def compare_canny_vs_yolo(image_path, canny_roi_file, model_path, output_dir=OUTPUT_DIR):
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
canny_img = extract_canny_overlay(image_path, canny_roi_file, distance_thresh=6)
if canny_img is None:
return
yolo_lines = get_yolo_left_edge_lines(image_path, model_path)
result_img = canny_img.copy()
for x1,y1,x2,y2 in yolo_lines:
cv2.line(result_img, (x1,y1), (x2,y2), (0,255,0), 2) # 绿色
cv2.circle(result_img, (x1,y1), 4, (255,0,0), -1) # 蓝色起点
output_path = output_dir / f"compare_{Path(image_path).stem}.jpg"
cv2.imwrite(str(output_path), result_img)
print(f"✅ 对比图已保存: {output_path}")
# ============================
# 使用示例
# ============================
if __name__ == "__main__":
IMAGE_PATH = "../test_image/2.jpg"
CANNY_ROI_FILE = "../roi_coordinates/1_rois1.txt"
MODEL_PATH = "best.pt"
compare_canny_vs_yolo(IMAGE_PATH, CANNY_ROI_FILE, MODEL_PATH)

BIN
yemian/yemian_line/best.pt Normal file

Binary file not shown.

View File

@ -0,0 +1,149 @@
import os
import cv2
import numpy as np
from pathlib import Path
from sklearn.linear_model import RANSACRegressor
# ---------------------------
# 读取 ROI 列表 txt
# ---------------------------
def load_rois_from_txt(txt_path):
rois = []
if not os.path.exists(txt_path):
print(f"❌ ROI 文件不存在: {txt_path}")
return rois
with open(txt_path, 'r') as f:
for line in f:
s = line.strip()
if s:
try:
x, y, w, h = map(int, s.split(','))
rois.append((x, y, w, h))
except Exception as e:
print(f"⚠️ 无法解析 ROI 行 '{s}': {e}")
return rois
# ---------------------------
# PCA + RANSAC + 迭代去离群点拟合直线
# ---------------------------
def fit_line_best(points, distance_thresh=5, max_iter=5):
if len(points) < 2:
return None
points = points.astype(np.float32)
for _ in range(max_iter):
mean = np.mean(points, axis=0)
cov = np.cov(points.T)
eigvals, eigvecs = np.linalg.eig(cov)
idx = np.argmax(eigvals)
direction = eigvecs[:, idx]
vx, vy = direction
x0, y0 = mean
dists = np.abs(vy*(points[:,0]-x0) - vx*(points[:,1]-y0)) / np.hypot(vx, vy)
inliers = points[dists <= distance_thresh]
if len(inliers) == len(points) or len(inliers) < 2:
break
points = inliers
if len(points) < 2:
return None
# RANSAC 拟合 y = kx + b
X = points[:, 0].reshape(-1, 1)
y = points[:, 1]
try:
ransac = RANSACRegressor(residual_threshold=distance_thresh)
ransac.fit(X, y)
k = ransac.estimator_.coef_[0]
b = ransac.estimator_.intercept_
vx = 1 / np.sqrt(1 + k**2)
vy = k / np.sqrt(1 + k**2)
x0 = np.mean(points[:,0])
y0 = k*x0 + b
except:
mean = np.mean(points, axis=0)
cov = np.cov(points.T)
eigvals, eigvecs = np.linalg.eig(cov)
idx = np.argmax(eigvals)
direction = eigvecs[:, idx]
vx, vy = direction
x0, y0 = mean
return vx, vy, x0, y0
# ---------------------------
# 封装函数:读取 ROI txt -> 拟合直线 -> 可视化
# ---------------------------
def fit_lines_from_image_txt(image_path, roi_txt_path, distance_thresh=5, draw_overlay=True):
"""
输入:
image_path: 原图路径
roi_txt_path: ROI txt 文件路径,每行 x,y,w,h
distance_thresh: 直线拟合残差阈值
draw_overlay: 是否在原图上叠加拟合直线
输出:
lines: 每个 ROI 的拟合直线 [(vx, vy, x0, y0), ...]
overlay_img: 可视化原图叠加拟合直线
"""
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图片: {image_path}")
return [], None
rois = load_rois_from_txt(roi_txt_path)
if not rois:
print("❌ 没有有效 ROI")
return [], None
overlay_img = img.copy() if draw_overlay else None
lines = []
for idx, (x, y, w, h) in enumerate(rois):
roi = img[y:y+h, x:x+w]
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 100, 200)
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
longest_contour = max(contours, key=lambda c: cv2.arcLength(c, closed=False), default=None)
if longest_contour is not None and len(longest_contour) >= 2:
points = longest_contour.reshape(-1, 2)
line = fit_line_best(points, distance_thresh=distance_thresh, max_iter=5)
if line:
vx, vy, x0, y0 = line
lines.append(line)
if draw_overlay:
cols = gray.shape[1]
lefty = int(y0 - vy/vx * x0)
righty = int(y0 + vy/vx * (cols - x0))
# 绘制在原图
cv2.line(overlay_img, (x, y + lefty), (x + cols - 1, y + righty), (0, 0, 255), 2)
cv2.drawContours(overlay_img, [longest_contour + np.array([x, y])], -1, (0, 255, 0), 1)
else:
lines.append(None)
else:
lines.append(None)
return lines, overlay_img
# ---------------------------
# 使用示例
# ---------------------------
if __name__ == "__main__":
image_path = "../test_image/1.jpg"
roi_txt_path = "../roi_coordinates/1_rois1.txt"
lines, overlay = fit_lines_from_image_txt(image_path, roi_txt_path, distance_thresh=5)
for idx, line in enumerate(lines):
print(f"ROI {idx} 拟合直线: {line}")
if overlay is not None:
cv2.imwrite("overlay_result.jpg", overlay)
print("✅ 原图叠加拟合直线已保存: overlay_result.jpg")

View File

@ -0,0 +1,161 @@
import os
import cv2
import numpy as np
from pathlib import Path
from ultralytics import YOLO
TARGET_SIZE = 640 # 模型输入尺寸
# --------------------
# 全局 ROI 定义
# --------------------
ROIS = [
(859, 810, 696, 328), # (x, y, w, h)
]
# --------------------
# 根据角点分布,选取左右边缘角点
# --------------------
def select_edge_corners(corners, w, left_ratio=0.2, right_ratio=0.2, y_var_thresh=5):
if corners is None:
return [], []
corners = np.int32(corners).reshape(-1, 2)
x_min, x_max = 0, w
left_thresh = x_min + int(w * left_ratio)
right_thresh = x_max - int(w * right_ratio)
# 左右候选角点
left_candidates = corners[corners[:, 0] <= left_thresh]
right_candidates = corners[corners[:, 0] >= right_thresh]
# --------------------
# 进一步按 y 变化筛选
# --------------------
def filter_by_y_variation(pts):
if len(pts) < 2:
return pts
pts_sorted = pts[np.argsort(pts[:, 1])]
diffs = np.abs(np.diff(pts_sorted[:, 1]))
keep_idx = np.where(diffs > y_var_thresh)[0]
selected = [pts_sorted[i] for i in keep_idx] + [pts_sorted[i + 1] for i in keep_idx]
return np.array(selected) if len(selected) > 0 else pts_sorted
left_final = filter_by_y_variation(left_candidates)
right_final = filter_by_y_variation(right_candidates)
return left_final, right_final
# --------------------
# 拟合直线并剔除离散点
# --------------------
def fit_line_with_outlier_removal(pts, dist_thresh=15):
"""
pts: (N,2) array
dist_thresh: 点到拟合直线的最大允许距离
返回 (m, b) 直线参数, 以及拟合用到的点
"""
if pts is None or len(pts) < 2:
return None, pts
pts = np.array(pts)
x, y = pts[:, 0], pts[:, 1]
# 第一次拟合
m, b = np.polyfit(y, x, 1) # x = m*y + b
x_fit = m * y + b
dists = np.abs(x - x_fit)
# 剔除离群点
mask = dists < dist_thresh
x2, y2 = x[mask], y[mask]
if len(x2) < 2:
return (m, b), pts # 保底返回
# 二次拟合
m, b = np.polyfit(y2, x2, 1)
return (m, b), np.stack([x2, y2], axis=1)
# --------------------
# 推理 ROI 并可视化 mask + 边缘角点 + 拟合直线
# --------------------
def infer_mask_with_selected_corners(image_path, model_path, output_dir="./output"):
model = YOLO(model_path)
image_path = Path(image_path)
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
orig_img = cv2.imread(str(image_path))
overlay_img = orig_img.copy()
for idx, (x, y, w, h) in enumerate(ROIS):
roi_img = orig_img[y:y+h, x:x+w]
resized_img = cv2.resize(roi_img, (TARGET_SIZE, TARGET_SIZE))
# 模型推理
results = model(source=resized_img, imgsz=TARGET_SIZE, verbose=False)
result = results[0]
# 可视化 mask
if result.masks is not None and len(result.masks.data) > 0:
mask = result.masks.data[0].cpu().numpy()
mask_bin = (mask > 0.5).astype(np.uint8)
mask_bin = cv2.resize(mask_bin, (w, h), interpolation=cv2.INTER_NEAREST)
# 绿色 mask 覆盖
color_mask = np.zeros_like(roi_img, dtype=np.uint8)
color_mask[mask_bin == 1] = (0, 255, 0)
overlay_img[y:y+h, x:x+w] = cv2.addWeighted(roi_img, 0.7, color_mask, 0.3, 0)
# 角点检测
mask_gray = (mask_bin * 255).astype(np.uint8)
corners = cv2.goodFeaturesToTrack(mask_gray,
maxCorners=200,
qualityLevel=0.01,
minDistance=5)
# 选择左右边缘角点
left_pts, right_pts = select_edge_corners(corners, w)
# 拟合直线并剔除离散点
left_line, left_inliers = fit_line_with_outlier_removal(left_pts)
right_line, right_inliers = fit_line_with_outlier_removal(right_pts)
# 可视化角点
for cx, cy in left_inliers:
cv2.circle(overlay_img[y:y+h, x:x+w], (int(cx), int(cy)), 5, (0, 0, 255), -1)
for cx, cy in right_inliers:
cv2.circle(overlay_img[y:y+h, x:x+w], (int(cx), int(cy)), 5, (255, 0, 0), -1)
# 可视化拟合直线
if left_line is not None:
m, b = left_line
y1, y2 = 0, h
x1, x2 = int(m * y1 + b), int(m * y2 + b)
cv2.line(overlay_img[y:y+h, x:x+w], (x1, y1), (x2, y2), (0, 0, 255), 3)
if right_line is not None:
m, b = right_line
y1, y2 = 0, h
x1, x2 = int(m * y1 + b), int(m * y2 + b)
cv2.line(overlay_img[y:y+h, x:x+w], (x1, y1), (x2, y2), (255, 0, 0), 3)
# 保存结果
save_path = output_dir / f"mask_edge_corners_{image_path.name}"
cv2.imwrite(str(save_path), overlay_img)
print(f"✅ 保存结果: {save_path}")
return overlay_img
# ===================== 使用示例 =====================
if __name__ == "__main__":
IMAGE_PATH = "../test_image/1.jpg"
MODEL_PATH = "best.pt"
infer_mask_with_selected_corners(IMAGE_PATH, MODEL_PATH)

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 851 KiB

View File

@ -0,0 +1,190 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import cv2
import numpy as np
from pathlib import Path
from ultralytics import YOLO
# --------------------
# 参数设置(固定在脚本中)
# --------------------
INPUT_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/test_l" # 图片文件夹
MODEL_PATH = "best.pt" # YOLO 模型
OUTPUT_DIR = "./output" # 保存结果
TARGET_SIZE = 640 # YOLO 输入尺寸
DIST_THRESH = 15 # 剔除离群点阈值
MAX_CORNERS = 200 # goodFeaturesToTrack 最大角点数
QUALITY_LEVEL = 0.01 # goodFeaturesToTrack qualityLevel
MIN_DISTANCE = 5 # goodFeaturesToTrack minDistance
# 全局 ROI 定义
ROIS = [
(859, 810, 696, 328), # (x, y, w, h)
]
# --------------------
# 左右边缘角点筛选
# --------------------
def select_edge_corners(corners, w, left_ratio=0.2, right_ratio=0.2, y_var_thresh=5):
if corners is None:
return np.zeros((0,2), dtype=np.int32), np.zeros((0,2), dtype=np.int32)
corners = np.int32(corners).reshape(-1, 2)
x_min, x_max = 0, w
left_thresh = x_min + int(w * left_ratio)
right_thresh = x_max - int(w * right_ratio)
left_candidates = corners[corners[:,0] <= left_thresh]
right_candidates = corners[corners[:,0] >= right_thresh]
def filter_by_y_variation(pts):
if len(pts) < 2:
return pts.astype(np.int32)
pts_sorted = pts[np.argsort(pts[:,1])]
diffs = np.abs(np.diff(pts_sorted[:,1]))
keep_idx = np.where(diffs > y_var_thresh)[0]
if len(keep_idx) == 0:
return pts_sorted.astype(np.int32)
selected = [pts_sorted[i] for i in keep_idx] + [pts_sorted[i+1] for i in keep_idx]
selected = np.array(selected)
selected = selected[np.argsort(selected[:,1])]
_, idx = np.unique(selected.reshape(-1,2), axis=0, return_index=True)
selected = selected[np.sort(idx)]
return selected.astype(np.int32)
left_final = filter_by_y_variation(left_candidates)
right_final = filter_by_y_variation(right_candidates)
return left_final, right_final
# --------------------
# 拟合直线并剔除离散点
# --------------------
def fit_line_with_outlier_removal(pts, dist_thresh=DIST_THRESH):
if pts is None or len(pts) < 2:
return None, np.zeros((0,2), dtype=np.int32)
pts = np.array(pts, dtype=np.float64)
x = pts[:,0]
y = pts[:,1]
try:
m, b = np.polyfit(y, x, 1)
except:
return None, np.zeros((0,2), dtype=np.int32)
x_fit = m*y + b
dists = np.abs(x - x_fit)
mask = dists < dist_thresh
if np.sum(mask) < 2:
return (m,b), pts.astype(np.int32)
x2, y2 = x[mask], y[mask]
try:
m2, b2 = np.polyfit(y2, x2, 1)
except:
return (m,b), np.stack([x2,y2],axis=1).astype(np.int32)
inliers = np.stack([x2,y2],axis=1).astype(np.int32)
return (m2,b2), inliers
# --------------------
# 单张图 ROI 处理
# --------------------
def process_roi_on_image(orig_img, roi):
rx, ry, rw, rh = roi
h_img, w_img = orig_img.shape[:2]
rx = max(0, rx); ry = max(0, ry)
rw = min(rw, w_img - rx); rh = min(rh, h_img - ry)
roi_img = orig_img[ry:ry+rh, rx:rx+rw].copy()
if roi_img.size == 0:
return None
resized = cv2.resize(roi_img, (TARGET_SIZE, TARGET_SIZE))
results = MODEL(resized, imgsz=TARGET_SIZE, verbose=False)
result = results[0]
overlay_roi = roi_img.copy()
if result.masks is None or len(result.masks.data)==0:
return overlay_roi
mask = result.masks.data[0].cpu().numpy()
mask_bin = (mask>0.5).astype(np.uint8)
mask_bin = cv2.resize(mask_bin,(rw,rh), interpolation=cv2.INTER_NEAREST)
# mask 半透明覆盖
color_mask = np.zeros_like(overlay_roi, dtype=np.uint8)
color_mask[mask_bin==1] = (0,255,0)
overlay_roi = cv2.addWeighted(overlay_roi,0.7,color_mask,0.3,0)
# 角点检测
mask_gray = (mask_bin*255).astype(np.uint8)
corners = cv2.goodFeaturesToTrack(mask_gray, maxCorners=MAX_CORNERS,
qualityLevel=QUALITY_LEVEL, minDistance=MIN_DISTANCE)
left_pts, right_pts = select_edge_corners(corners, rw)
left_line, left_inliers = fit_line_with_outlier_removal(left_pts)
right_line, right_inliers = fit_line_with_outlier_removal(right_pts)
# 可视化 inliers
for (cx,cy) in left_inliers:
cv2.circle(overlay_roi,(int(cx),int(cy)),4,(0,0,255),-1)
for (cx,cy) in right_inliers:
cv2.circle(overlay_roi,(int(cx),int(cy)),4,(255,0,0),-1)
# 拟合直线
if left_line is not None:
m,b = left_line
y1,y2 = 0, rh-1
x1 = int(m*y1+b); x2 = int(m*y2+b)
cv2.line(overlay_roi,(x1,y1),(x2,y2),(0,0,200),3)
if right_line is not None:
m,b = right_line
y1,y2 = 0, rh-1
x1 = int(m*y1+b); x2 = int(m*y2+b)
cv2.line(overlay_roi,(x1,y1),(x2,y2),(200,0,0),3)
return overlay_roi
# --------------------
# 批量推理文件夹
# --------------------
def infer_folder_images():
input_dir = Path(INPUT_DIR)
output_dir = Path(OUTPUT_DIR)
output_dir.mkdir(parents=True, exist_ok=True)
exts = ('*.jpg','*.jpeg','*.png','*.bmp','*.tif','*.tiff')
files = []
for e in exts:
files.extend(sorted(input_dir.glob(e)))
if len(files)==0:
print("未找到图片文件")
return
print(f"找到 {len(files)} 张图片,开始推理...")
for img_path in files:
print("-> 处理:", img_path.name)
orig_img = cv2.imread(str(img_path))
if orig_img is None:
print(" 无法读取,跳过")
continue
out_img = orig_img.copy()
for roi in ROIS:
overlay_roi = process_roi_on_image(orig_img, roi)
if overlay_roi is not None:
rx,ry,rw,rh = roi
h,w = overlay_roi.shape[:2]
out_img[ry:ry+h, rx:rx+w] = overlay_roi
save_path = output_dir / f"mask_edge_corners_{img_path.name}"
cv2.imwrite(str(save_path), out_img)
print(" 已保存 ->", save_path.name)
print("批量推理完成,结果保存在:", output_dir)
# --------------------
# 主程序
# --------------------
if __name__ == "__main__":
MODEL = YOLO(MODEL_PATH)
infer_folder_images()

View File

@ -0,0 +1,130 @@
import os
import cv2
import numpy as np
from pathlib import Path
from ultralytics import YOLO
TARGET_SIZE = 640 # 模型输入尺寸
# --------------------
# 全局 ROI 定义
# --------------------
ROIS = [
(859,810,696,328), # (x, y, w, h)
]
# --------------------
# 剔除相邻 x 差距过大的离散点
# --------------------
def filter_outliers_by_x(pts, x_thresh=30):
if len(pts) < 2:
return pts
pts_sorted = pts[np.argsort(pts[:, 1])] # 按 y 排序
clean_pts = [pts_sorted[0]]
for i in range(1, len(pts_sorted)):
if abs(pts_sorted[i, 0] - pts_sorted[i-1, 0]) < x_thresh:
clean_pts.append(pts_sorted[i])
return np.array(clean_pts, dtype=np.int32)
# --------------------
# 根据角点分布,选取左右边缘角点
# --------------------
def select_edge_corners(corners, w, left_ratio=0.2, right_ratio=0.2, y_var_thresh=5, x_var_thresh=30):
if corners is None:
return [], []
corners = np.int32(corners).reshape(-1, 2)
x_min, x_max = 0, w
left_thresh = x_min + int(w * left_ratio)
right_thresh = x_max - int(w * right_ratio)
# 左右候选角点
left_candidates = corners[corners[:, 0] <= left_thresh]
right_candidates = corners[corners[:, 0] >= right_thresh]
# --------------------
# 进一步按 y 变化筛选
# --------------------
def filter_by_y_variation(pts):
if len(pts) < 2:
return pts
pts_sorted = pts[np.argsort(pts[:, 1])]
diffs = np.abs(np.diff(pts_sorted[:, 1]))
keep_idx = np.where(diffs > y_var_thresh)[0]
selected = [pts_sorted[i] for i in keep_idx] + [pts_sorted[i+1] for i in keep_idx]
return np.array(selected) if len(selected) > 0 else pts_sorted
left_final = filter_by_y_variation(left_candidates)
right_final = filter_by_y_variation(right_candidates)
# --------------------
# 再剔除相邻 x 值差距过大的离散点
# --------------------
left_final = filter_outliers_by_x(left_final, x_var_thresh)
right_final = filter_outliers_by_x(right_final, x_var_thresh)
return left_final, right_final
# --------------------
# 推理 ROI 并可视化 mask + 边缘角点
# --------------------
def infer_mask_with_selected_corners(image_path, model_path, output_dir="./output"):
model = YOLO(model_path)
image_path = Path(image_path)
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
orig_img = cv2.imread(str(image_path))
overlay_img = orig_img.copy()
for idx, (x, y, w, h) in enumerate(ROIS):
roi_img = orig_img[y:y+h, x:x+w]
resized_img = cv2.resize(roi_img, (TARGET_SIZE, TARGET_SIZE))
# 模型推理
results = model(source=resized_img, imgsz=TARGET_SIZE, verbose=False)
result = results[0]
# 可视化 mask
if result.masks is not None and len(result.masks.data) > 0:
mask = result.masks.data[0].cpu().numpy()
mask_bin = (mask > 0.5).astype(np.uint8)
mask_bin = cv2.resize(mask_bin, (w, h), interpolation=cv2.INTER_NEAREST)
# 绿色 mask 覆盖
color_mask = np.zeros_like(roi_img, dtype=np.uint8)
color_mask[mask_bin == 1] = (0, 255, 0)
overlay_img[y:y+h, x:x+w] = cv2.addWeighted(roi_img, 0.7, color_mask, 0.3, 0)
# 角点检测
mask_gray = (mask_bin * 255).astype(np.uint8)
corners = cv2.goodFeaturesToTrack(mask_gray,
maxCorners=200,
qualityLevel=0.01,
minDistance=5)
# 选择左右边缘角点
left_pts, right_pts = select_edge_corners(corners, w)
# 可视化
for cx, cy in left_pts:
cv2.circle(overlay_img[y:y+h, x:x+w], (cx, cy), 6, (0, 0, 255), -1) # 左边红色
for cx, cy in right_pts:
cv2.circle(overlay_img[y:y+h, x:x+w], (cx, cy), 6, (255, 0, 0), -1) # 右边蓝色
# 保存结果
save_path = output_dir / f"mask_edge_corners_{image_path.name}"
cv2.imwrite(str(save_path), overlay_img)
print(f"✅ 保存结果: {save_path}")
return overlay_img
# ===================== 使用示例 =====================
if __name__ == "__main__":
IMAGE_PATH = "../test_image/1.jpg"
MODEL_PATH = "best.pt"
infer_mask_with_selected_corners(IMAGE_PATH, MODEL_PATH)