三阶段投料修正

This commit is contained in:
cdeyw
2025-09-17 21:54:55 +08:00
parent b04d41fba8
commit a143504a90
20 changed files with 1226 additions and 260 deletions

BIN
src/vision/alig.pt Normal file

Binary file not shown.

View File

@ -0,0 +1,84 @@
import cv2
import os
import numpy as np
from ultralytics import YOLO
def predict_obb_best_angle(model=None, model_path=None, image_path=None, save_path=None):
"""
输入:
model: 预加载的YOLO模型实例可选
model_path: YOLO 权重路径当model为None时使用
image_path: 图片路径
save_path: 可选,保存带标注图像
输出:
angle_deg: 置信度最高两个框的主方向夹角(度),如果检测少于两个目标返回 None
annotated_img: 可视化图像
"""
# 1. 使用预加载的模型或加载新模型
if model is not None:
# 使用预加载的模型
loaded_model = model
elif model_path is not None:
# 加载模型
loaded_model = YOLO(model_path)
else:
raise ValueError("必须提供model或model_path参数")
# 2. 读取图像
img = cv2.imread(image_path)
if img is None:
print(f"无法读取图像: {image_path}")
return None, None
# 3. 推理 OBB
results = loaded_model(img, save=False, imgsz=640, conf=0.5, mode='obb')
result = results[0]
# 4. 可视化
annotated_img = result.plot()
if save_path:
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, annotated_img)
print(f"推理结果已保存至: {save_path}")
# 5. 提取旋转角度和置信度
boxes = result.obb
if boxes is None or len(boxes) < 2:
print("检测到少于两个目标,无法计算夹角。")
return None, annotated_img
box_info = []
for box in boxes:
conf = box.conf.cpu().numpy()[0]
cx, cy, w, h, r_rad = box.xywhr.cpu().numpy()[0]
direction = r_rad if w >= h else r_rad + np.pi/2
direction = direction % np.pi
box_info.append((conf, direction))
# 6. 取置信度最高两个框
box_info = sorted(box_info, key=lambda x: x[0], reverse=True)
dir1, dir2 = box_info[0][1], box_info[1][1]
# 7. 计算夹角最小夹角0~90°
diff = abs(dir1 - dir2)
diff = min(diff, np.pi - diff)
angle_deg = np.degrees(diff)
print(f"置信度最高两个框主方向夹角: {angle_deg:.2f}°")
return angle_deg, annotated_img
# ------------------- 测试 -------------------
# if __name__ == "__main__":
# weight_path = r'angle.pt'
# image_path = r"./test_image/3.jpg"
# save_path = "./inference_results/detected_3.jpg"
#
# #angle_deg, annotated_img = predict_obb_best_angle(weight_path, image_path, save_path)
# angle_deg,_ = predict_obb_best_angle(model_path=weight_path, image_path=image_path, save_path=save_path)
# annotated_img = None
# print(angle_deg)
# if annotated_img is not None:
# cv2.imshow("YOLO OBB Prediction", annotated_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

BIN
src/vision/angle.pt Normal file

Binary file not shown.

BIN
src/vision/overflow.pt Normal file

Binary file not shown.

106
src/vision/resize_main.py Normal file
View File

@ -0,0 +1,106 @@
import os
import shutil
from pathlib import Path
from ultralytics import YOLO
import cv2
# ---------------------------
# ROI 裁剪函数
# ---------------------------
def load_global_rois(txt_path):
"""加载全局 ROI 坐标"""
rois = []
if not os.path.exists(txt_path):
print(f"❌ ROI 文件不存在: {txt_path}")
return rois
with open(txt_path, 'r') as f:
for line in f:
line = line.strip()
if line:
try:
x, y, w, h = map(int, line.split(','))
rois.append((x, y, w, h))
print(f"📌 加载 ROI: (x={x}, y={y}, w={w}, h={h})")
except Exception as e:
print(f"⚠️ 无法解析 ROI 行: {line}, 错误: {e}")
return rois
def crop_and_resize(img, rois, target_size=640):
"""根据 ROI 裁剪并 resize"""
crops = []
for i, (x, y, w, h) in enumerate(rois):
h_img, w_img = img.shape[:2]
if x < 0 or y < 0 or x + w > w_img or y + h > h_img:
print(f"⚠️ ROI 越界,跳过: {x},{y},{w},{h}")
continue
roi_img = img[y:y+h, x:x+w]
roi_resized = cv2.resize(roi_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
crops.append((roi_resized, i))
return crops
# ---------------------------
# 分类函数
# ---------------------------
def classify_and_save_images(model_path, input_folder, output_root, roi_file, target_size=640):
# 加载模型
model = YOLO(model_path)
# 确保输出根目录存在
output_root = Path(output_root)
output_root.mkdir(parents=True, exist_ok=True)
# 创建类别子文件夹 (class0 到 class4)
class_dirs = []
for i in range(5): # 假设有5个类别 (0-4)
class_dir = output_root / f"class{i}"
class_dir.mkdir(exist_ok=True)
class_dirs.append(class_dir)
# 加载 ROI
rois = load_global_rois(roi_file)
if len(rois) == 0:
print("❌ 没有有效 ROI退出")
return
# 遍历输入文件夹
for img_path in Path(input_folder).glob("*.*"):
if img_path.suffix.lower() not in ['.jpg', '.jpeg', '.png', '.bmp', '.tif']:
continue
try:
# 读取原图
img = cv2.imread(str(img_path))
if img is None:
print(f"❌ 无法读取图像: {img_path}")
continue
# 根据 ROI 裁剪
crops = crop_and_resize(img, rois, target_size)
for roi_img, roi_idx in crops:
# YOLO 推理
results = model(roi_img)
pred = results[0].probs.data # 获取概率分布
class_id = int(pred.argmax())
# 保存到对应类别文件夹
suffix = f"_roi{roi_idx}" if len(crops) > 1 else ""
dst_path = class_dirs[class_id] / f"{img_path.stem}{suffix}{img_path.suffix}"
cv2.imwrite(dst_path, roi_img) # 保存裁剪后的 ROI 图像
print(f"Processed {img_path.name}{suffix} -> Class {class_id}")
except Exception as e:
print(f"Error processing {img_path.name}: {str(e)}")
# ---------------------------
# 主程序
# ---------------------------
if __name__ == "__main__":
model_path = r"overflow.pt"
input_folder = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/f6"
output_root = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/class111"
roi_file = "./roi_coordinates/1_rois.txt" # 训练时使用的 ROI 文件
target_size = 640
classify_and_save_images(model_path, input_folder, output_root, roi_file, target_size)

View File

@ -0,0 +1,184 @@
import os
from pathlib import Path
import cv2
import numpy as np
from ultralytics import YOLO
# ---------------------------
# 类别映射
# ---------------------------
CLASS_NAMES = {
0: "未堆料",
1: "小堆料",
2: "大堆料",
3: "未浇筑满",
4: "浇筑满"
}
# ---------------------------
# 加载 ROI 列表
# ---------------------------
def load_global_rois(txt_path):
rois = []
if not os.path.exists(txt_path):
print(f"ROI 文件不存在: {txt_path}")
return rois
with open(txt_path, 'r') as f:
for line in f:
s = line.strip()
if s:
try:
x, y, w, h = map(int, s.split(','))
rois.append((x, y, w, h))
except Exception as e:
print(f"无法解析 ROI 行 '{s}': {e}")
return rois
# ---------------------------
# 裁剪并 resize ROI
# ---------------------------
def crop_and_resize(img, rois, target_size=640):
crops = []
h_img, w_img = img.shape[:2]
for i, (x, y, w, h) in enumerate(rois):
if x < 0 or y < 0 or x + w > w_img or y + h > h_img:
continue
roi = img[y:y + h, x:x + w]
roi_resized = cv2.resize(roi, (target_size, target_size), interpolation=cv2.INTER_AREA)
crops.append((roi_resized, i))
return crops
# ---------------------------
# class1/class2 加权判断
# ---------------------------
def weighted_small_large(pred_probs, threshold=0.4, w1=0.3, w2=0.7):
p1 = float(pred_probs[1])
p2 = float(pred_probs[2])
total = p1 + p2
if total > 0:
score = (w1 * p1 + w2 * p2) / total
else:
score = 0.0
final_class = "大堆料" if score >= threshold else "小堆料"
return final_class, score, p1, p2
# ---------------------------
# 单张图片推理函数
# ---------------------------
def classify_image_weighted(image, model, threshold=0.4):
results = model(image)
pred_probs = results[0].probs.data.cpu().numpy().flatten()
class_id = int(pred_probs.argmax())
confidence = float(pred_probs[class_id])
class_name = CLASS_NAMES.get(class_id, f"未知类别({class_id})")
# class1/class2 使用加权得分
if class_id in [1, 2]:
final_class, score, p1, p2 = weighted_small_large(pred_probs, threshold=threshold)
else:
final_class = class_name
score = confidence
p1 = float(pred_probs[1])
p2 = float(pred_probs[2])
return final_class, score, p1, p2
# ---------------------------
# 实时视频流推理函数
# ---------------------------
def real_time_inference(rtsp_url, model_path, roi_file, target_size=640, threshold=0.4):
"""
从RTSP流实时推理
:param rtsp_url: RTSP流URL
:param model_path: 模型路径
:param roi_file: ROI文件路径
:param target_size: 目标尺寸
:param threshold: 分类阈值
"""
# 加载模型
model = YOLO(model_path)
# 加载ROI
rois = load_global_rois(roi_file)
if not rois:
print("❌ 没有有效 ROI退出")
return
# 打开RTSP流
cap = cv2.VideoCapture(rtsp_url)
if not cap.isOpened():
print(f"❌ 无法打开视频流: {rtsp_url}")
return
print(f"✅ 成功连接到视频流: {rtsp_url}")
print("'q' 键退出,按 's' 键保存当前帧")
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
print("❌ 无法读取帧,可能连接已断开")
break
frame_count += 1
print(f"\n处理第 {frame_count}")
try:
# 裁剪并调整ROI
crops = crop_and_resize(frame, rois, target_size)
for roi_resized, roi_idx in crops:
final_class, score, p1, p2 = classify_image_weighted(roi_resized, model, threshold=threshold)
print(f"ROI {roi_idx} -> 类别: {final_class}, 加权分数: {score:.2f}, "
f"class1 置信度: {p1:.2f}, class2 置信度: {p2:.2f}")
# 判断是否溢料
if "大堆料" in final_class or "浇筑满" in final_class:
print(f"🚨 检测到溢料: ROI {roi_idx} - {final_class}")
# 可视化(可选)
cv2.imshow(f'ROI {roi_idx}', roi_resized)
# 显示原始帧
cv2.imshow('Original Frame', frame)
except Exception as e:
print(f"处理帧时出错: {e}")
continue
# 键盘控制
key = cv2.waitKey(1) & 0xFF
if key == ord('q'): # 按q退出
break
elif key == ord('s'): # 按s保存当前帧
cv2.imwrite(f"frame_{frame_count}.jpg", frame)
print(f"保存帧到 frame_{frame_count}.jpg")
# 清理资源
cap.release()
cv2.destroyAllWindows()
print("✅ 视频流处理结束")
# ---------------------------
# 主函数 - 实时推理示例
# ---------------------------
if __name__ == "__main__":
# RTSP流URL
rtsp_url = "rtsp://admin:XJ123456@192.168.1.51:554/streaming/channels/101"
# 配置参数
model_path = r"overflow.pt"
roi_file = r"./roi_coordinates/1_rois.txt"
target_size = 640
threshold = 0.4
print("开始实时视频流推理...")
real_time_inference(rtsp_url, model_path, roi_file, target_size, threshold)

View File

@ -0,0 +1 @@
859,810,696,328

BIN
src/vision/test_image/1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 587 KiB

BIN
src/vision/test_image/2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 513 KiB

BIN
src/vision/test_image/3.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 MiB