first commit

This commit is contained in:
琉璃月光
2025-10-21 11:07:29 +08:00
commit c134abf749
152 changed files with 5400 additions and 0 deletions

25
ailai_pc/1.py Normal file
View File

@ -0,0 +1,25 @@
from ultralytics import YOLO
import cv2
import torch
# 加载模型
model = YOLO('best.pt')
# 读取一张真实图像
img_path = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2/train/1.jpg' # 替换成您的图像路径
image = cv2.imread(img_path)
# 将图像转换成RGB格式并调整大小
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_resized = cv2.resize(image_rgb, (640, 640))
# 进行推理
results = model(image_resized)
# 打印关键点数据形状和样本
if len(results) > 0 and hasattr(results[0], 'keypoints') and results[0].keypoints is not None:
print("Keypoints data shape:", results[0].keypoints.data.shape)
if results[0].keypoints.data.shape[0] > 0:
print("Keypoints data sample:", results[0].keypoints.data[0, :12])
else:
print("No keypoints detected or invalid keypoints data.")

42
ailai_pc/1.xml Normal file
View File

@ -0,0 +1,42 @@
<annotation verified="no">
<folder>train</folder>
<filename>1.jpg</filename>
<path>/home/hx/桌面/ailai_test/train/1.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>1280</width>
<height>720</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<type>robndbox</type>
<name>ban</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<robndbox>
<cx>630.9352</cx>
<cy>227.4174</cy>
<w>538.9691</w>
<h>244.294</h>
<angle>3.071593</angle>
</robndbox>
</object>
<object>
<type>robndbox</type>
<name>bag</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<robndbox>
<cx>655.631</cx>
<cy>218.0273</cy>
<w>282.1343</w>
<h>179.1767</h>
<angle>3.101593</angle>
</robndbox>
</object>
</annotation>

145
ailai_pc/angle.py Normal file
View File

@ -0,0 +1,145 @@
from ultralytics import YOLO
import cv2
import os
import numpy as np
# 设置类别名称(必须与训练时一致)
CLASS_NAMES = ['ban', 'bag'] # ✅ 确保顺序正确,对应模型的 class_id
COLORS = [(0, 255, 0), (255, 0, 0)] # ban: 绿色, bag: 蓝色
def get_best_angles_per_class(image_path, weight_path, return_degree=False):
"""
输入:
image_path: 图像路径
weight_path: YOLO OBB 权重路径
return_degree: 是否返回角度(单位:度),否则为弧度
输出:
字典:{ class_name: best_angle 或 None }
"""
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图像:{image_path}")
return {cls: None for cls in CLASS_NAMES}
model = YOLO(weight_path)
results = model(img, save=False, imgsz=640, conf=0.15, task='obb')
result = results[0]
boxes = result.obb
if boxes is None or len(boxes) == 0:
print("⚠️ 未检测到任何目标。")
return {cls: None for cls in CLASS_NAMES}
# 提取数据
xywhr = boxes.xywhr.cpu().numpy() # (N, 5) -> cx, cy, w, h, r (弧度)
confs = boxes.conf.cpu().numpy() # (N,)
class_ids = boxes.cls.cpu().numpy().astype(int) # (N,)
# 初始化结果字典
best_angles = {cls: None for cls in CLASS_NAMES}
# 对每个类别找置信度最高的框
for class_id, class_name in enumerate(CLASS_NAMES):
mask = (class_ids == class_id)
if not np.any(mask):
print(f"🟡 未检测到类别: {class_name}")
continue
# 找该类别中置信度最高的
idx_in_class = np.argmax(confs[mask])
global_idx = np.where(mask)[0][idx_in_class]
angle_rad = xywhr[global_idx][4]
best_angles[class_name] = np.degrees(angle_rad) if return_degree else angle_rad
return best_angles
def save_obb_visual(image_path, weight_path, save_path):
"""
输入:
image_path: 图像路径
weight_path: YOLO权重路径
save_path: 保存带标注图像路径
功能:
检测所有 OBB绘制框、类别名、旋转角度保存图片
"""
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图像:{image_path}")
return
model = YOLO(weight_path)
results = model(img, save=False, imgsz=640, conf=0.05, task='obb')
result = results[0]
boxes = result.obb
if boxes is None or len(boxes) == 0:
print("⚠️ 未检测到任何目标。")
# 仍保存原图
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, img)
return
# 提取信息
xywhr = boxes.xywhr.cpu().numpy()
confs = boxes.conf.cpu().numpy()
class_ids = boxes.cls.cpu().numpy().astype(int)
# 绘制
annotated_img = img.copy()
for i in range(len(boxes)):
cx, cy, w, h, r = xywhr[i]
angle_deg = np.degrees(r)
class_id = class_ids[i]
class_name = CLASS_NAMES[class_id] if class_id < len(CLASS_NAMES) else f"cls{class_id}"
conf = confs[i]
color = COLORS[class_id % len(COLORS)] if class_id < len(CLASS_NAMES) else (128, 128, 128)
# 绘制旋转框
rect = ((cx, cy), (w, h), angle_deg)
box_pts = cv2.boxPoints(rect).astype(int)
cv2.polylines(annotated_img, [box_pts], isClosed=True, color=color, thickness=2)
# 标注文本:类别 + 置信度 + 角度
text = f"{class_name} {conf:.2f} {angle_deg:.1f}°"
font_scale = 0.7
thickness = 2
text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
# 文本背景
cv2.rectangle(annotated_img,
(box_pts[0][0], box_pts[0][1] - text_size[1] - 8),
(box_pts[0][0] + text_size[0], box_pts[0][1] + 2),
color, -1)
# 文本
cv2.putText(annotated_img, text,
(box_pts[0][0], box_pts[0][1] - 5),
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), thickness)
# 保存
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, annotated_img)
print(f"✅ 检测结果已保存至: {save_path}")
# ===============================
# 示例调用
# ===============================
if __name__ == "__main__":
weight = r"/home/hx/yolo/ultralytics_yolo11-main/runs/train/exp_obb3/weights/best.pt"
image = r"/home/hx/yolo/output_masks/2.jpg"
save_path = "./inference_results/visualized_2.jpg"
# 获取每个类别的最佳角度(以度为单位)
angles_deg = get_best_angles_per_class(image, weight, return_degree=True)
print("\n🎯 各类别最佳旋转角度(度):")
for cls_name, angle in angles_deg.items():
if angle is not None:
print(f" {cls_name}: {angle:.2f}°")
else:
print(f" {cls_name}: 未检测到")
# 可视化所有检测结果
save_obb_visual(image, weight, save_path)

210
ailai_pc/angle_and_diff.py Normal file
View File

@ -0,0 +1,210 @@
from ultralytics import YOLO
import cv2
import numpy as np
import os
# ================================
# 配置区(所有固定参数放在这里)
# ================================
# --- 模型与类别 ---
CLASS_NAMES = ['ban', 'bag'] # 类别名称
WEIGHT_PATH = r"/home/hx/yolo/ultralytics_yolo11-main/runs/train/exp_obb_ailai2/weights/best.pt"
IMAGE_PATH = r"/home/hx/ailai_image_obb/ailai_obb/camera01/1.jpg"
# --- 结果保存路径 ---
RESULT_DIR = "./inference_results"
os.makedirs(RESULT_DIR, exist_ok=True) # 确保目录存在
RESULT_IMAGE_PATH = os.path.join(RESULT_DIR, os.path.basename(IMAGE_PATH)) # 保存路径
FIXED_BAG_CENTER_FILE = os.path.join(RESULT_DIR, "bag_center.txt") # 固定参考点,只读'
# --- ban 的实际物理尺寸(单位:毫米 mm---
BAN_REAL_WIDTH_MM = 890 # 实际宽度例如800 mm
BAN_REAL_HEIGHT_MM = 1990 # 实际高度例如1990 mm
# --- ban 在图像中的平均像素尺寸(由模型检测得到,稳定值)---
BAN_PIXEL_WIDTH_PX = 536.35 # 图像中检测到的平均宽度(像素)
BAN_PIXEL_HEIGHT_PX = 242.83 # 图像中检测到的平均高度(像素)
# --- 计算缩放因子mm/px---
SCALE_X = BAN_REAL_WIDTH_MM / BAN_PIXEL_WIDTH_PX # mm/px
SCALE_Y = BAN_REAL_HEIGHT_MM / BAN_PIXEL_HEIGHT_PX # mm/px
print(f"标定信息 1 像素 = {SCALE_X:.3f} mm (水平) {SCALE_Y:.3f} mm (垂直)")
# --- 固定 bag 角度(单位:度)---
FIXED_BAG_ANGLE_DEG = 0.0 # 固定参考角度
# ================================
# 工具函数
# ================================
def pixels_to_physical(dx_px, dy_px):
"""
将像素偏移量转换为物理偏移量(毫米)
"""
dx_mm = dx_px * SCALE_X
dy_mm = dy_px * SCALE_Y
return dx_mm, dy_mm
def load_center_from_txt(txt_path):
"""加载文本中的中心点 (cx, cy)"""
if not os.path.exists(txt_path):
print(f"文件不存在 {txt_path}")
return None
try:
with open(txt_path, "r") as f:
line = f.readline().strip()
if not line:
return None
x, y = map(float, line.split())
return (x, y)
except Exception as e:
print(f"读取文件失败 {txt_path} {e}")
return None
def get_angles_and_centers(image_path, weight_path):
"""获取每个类别的角度和中心点"""
img = cv2.imread(image_path)
if img is None:
print(f"无法读取图像 {image_path}")
return {cls: None for cls in CLASS_NAMES}, None
model = YOLO(weight_path)
results = model(img, imgsz=640, conf=0.15, task='obb')
result = results[0]
boxes = result.obb
if boxes is None or len(boxes) == 0:
print("未检测到任何目标")
return {cls: None for cls in CLASS_NAMES}, img
xywhr = boxes.xywhr.cpu().numpy()
confs = boxes.conf.cpu().numpy()
class_ids = boxes.cls.cpu().numpy().astype(int)
angles_and_centers = {}
for class_id, class_name in enumerate(CLASS_NAMES):
mask = (class_ids == class_id)
if not np.any(mask):
print(f"未检测到类别 {class_name}")
angles_and_centers[class_name] = None
continue
idx_in_class = np.argmax(confs[mask])
global_idx = np.where(mask)[0][idx_in_class]
cx, cy, w, h, r = xywhr[global_idx]
angle_deg = np.degrees(r)
angles_and_centers[class_name] = {
'angle': angle_deg,
'center': (float(cx), float(cy)),
'width': float(w),
'height': float(h),
'box_points': cv2.boxPoints(((cx, cy), (w, h), angle_deg)) # 获取四个角点
}
return angles_and_centers, img
def draw_results_and_save(image, angles_and_centers, fixed_bag_center, save_path):
"""绘制旋转框、中心点、标签、偏移向量,并保存图像"""
vis_img = image.copy()
# 颜色定义
colors = {
'ban': (0, 255, 0), # 绿色
'bag': (255, 0, 0) # 蓝色
}
# 绘制每个检测结果
for cls_name, info in angles_and_centers.items():
if info is None:
continue
# 绘制旋转框
box_pts = np.int32(info['box_points'])
cv2.drawContours(vis_img, [box_pts], 0, colors[cls_name], 2)
# 绘制中心点
cx, cy = int(info['center'][0]), int(info['center'][1])
cv2.circle(vis_img, (cx, cy), 5, (0, 0, 255), -1) # 红色实心圆
# 添加标签
label = f"{cls_name} {info['angle']:.1f}°"
cv2.putText(vis_img, label, (cx - 30, cy - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, colors[cls_name], 2)
# 绘制 fixed_bag_center 和偏移向量
current_bag = angles_and_centers.get('bag')
if fixed_bag_center and current_bag:
fx, fy = int(fixed_bag_center[0]), int(fixed_bag_center[1])
cx, cy = int(current_bag['center'][0]), int(current_bag['center'][1])
# 绘制 fixed center空心圆
cv2.circle(vis_img, (fx, fy), 7, (255, 255, 0), 2) # 青色
# 绘制从 fixed 到 current 的箭头(偏移向量)
cv2.arrowedLine(vis_img, (fx, fy), (cx, cy), (0, 255, 255), 2, tipLength=0.05)
# 添加偏移文字
dx_px = cx - fx
dy_px = cy - fy
dx_mm, dy_mm = pixels_to_physical(dx_px, dy_px)
offset_text = f"Δx={dx_mm:.1f}mm Δy={dy_mm:.1f}mm"
cv2.putText(vis_img, offset_text, (fx + 10, fy + 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
# 保存图像
cv2.imwrite(save_path, vis_img)
print(f"\n✅ 可视化结果已保存至: {save_path}")
# ===============================
# 主程序
# ===============================
if __name__ == "__main__":
# 1. 获取当前检测结果和图像
angles_and_centers, img = get_angles_and_centers(IMAGE_PATH, WEIGHT_PATH)
if img is None:
exit(1)
print("\n各类别旋转角度 中心点")
for cls_name, info in angles_and_centers.items():
if info is not None:
print(f" {cls_name} 角度 {info['angle']:.2f} 中心点 {info['center'][0]:.2f} {info['center'][1]:.2f} 尺寸 {info['width']:.2f} {info['height']:.2f} px")
else:
print(f" {cls_name} 未检测到")
# 2. 获取固定参考 bag 中心点
fixed_bag_center = load_center_from_txt(FIXED_BAG_CENTER_FILE)
print(f"\n固定参考 bag 中心点 {fixed_bag_center}")
# 3. 计算偏移量(像素)
current_bag = angles_and_centers.get('bag')
if current_bag and current_bag['center'] and fixed_bag_center:
dx_px = current_bag['center'][0] - fixed_bag_center[0]
dy_px = current_bag['center'][1] - fixed_bag_center[1]
print(f"\n像素偏移量")
print(f" Δx {dx_px:.2f} px Δy {dy_px:.2f} px")
# 转换为物理偏移量
dx_mm, dy_mm = pixels_to_physical(dx_px, dy_px)
print(f"\n物理偏移量 基于 ban 尺寸标定")
print(f" ΔX {dx_mm:.2f} mm ΔY {dy_mm:.2f} mm")
# 计算角度偏移量
current_bag_angle = current_bag['angle']
angle_diff = current_bag_angle - FIXED_BAG_ANGLE_DEG
print(f"\n角度偏移量")
print(f" 当前 bag 角度 {current_bag_angle:.2f}")
print(f" 固定 bag 角度 {FIXED_BAG_ANGLE_DEG}")
print(f" 角度差 {angle_diff:.2f}")
else:
print(f"\n偏移量计算失败 数据缺失")
if not current_bag:
print(" 未检测到 bag")
if not fixed_bag_center:
print(" 固定参考点文件为空或不存在")
# 4. 绘制并保存可视化结果
draw_results_and_save(img, angles_and_centers, fixed_bag_center, RESULT_IMAGE_PATH)

3466
ailai_pc/annotations.xml Normal file

File diff suppressed because it is too large Load Diff

171
ailai_pc/bag_bushu.py Normal file
View File

@ -0,0 +1,171 @@
import cv2
import numpy as np
import math
from shapely.geometry import Polygon
from rknnlite.api import RKNNLite
import os
CLASSES = ['clamp']
nmsThresh = 0.4
objectThresh = 0.5
# ------------------- 工具函数 -------------------
def letterbox_resize(image, size, bg_color=114):
target_width, target_height = size
image_height, image_width, _ = image.shape
scale = min(target_width / image_width, target_height / image_height)
new_width, new_height = int(image_width * scale), int(image_height * scale)
image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
canvas = np.ones((target_height, target_width, 3), dtype=np.uint8) * bg_color
offset_x, offset_y = (target_width - new_width) // 2, (target_height - new_height) // 2
canvas[offset_y:offset_y + new_height, offset_x:offset_x + new_width] = image
return canvas, scale, offset_x, offset_y
class DetectBox:
def __init__(self, classId, score, xmin, ymin, xmax, ymax, angle):
self.classId = classId
self.score = score
self.xmin = xmin
self.ymin = ymin
self.xmax = xmax
self.ymax = ymax
self.angle = angle
def rotate_rectangle(x1, y1, x2, y2, a):
cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
x1_new = int((x1 - cx) * math.cos(a) - (y1 - cy) * math.sin(a) + cx)
y1_new = int((x1 - cx) * math.sin(a) + (y1 - cy) * math.cos(a) + cy)
x2_new = int((x2 - cx) * math.cos(a) - (y2 - cy) * math.sin(a) + cx)
y2_new = int((x2 - cx) * math.sin(a) + (y2 - cy) * math.cos(a) + cy)
x3_new = int((x1 - cx) * math.cos(a) - (y2 - cy) * math.sin(a) + cx)
y3_new = int((x1 - cx) * math.sin(a) + (y2 - cy) * math.cos(a) + cy)
x4_new = int((x2 - cx) * math.cos(a) - (y1 - cy) * math.sin(a) + cx)
y4_new = int((x2 - cx) * math.sin(a) + (y1 - cy) * math.cos(a) + cy)
return [(x1_new, y1_new), (x3_new, y3_new), (x2_new, y2_new), (x4_new, y4_new)]
def intersection(g, p):
g = Polygon(np.array(g).reshape(-1,2))
p = Polygon(np.array(p).reshape(-1,2))
if not g.is_valid or not p.is_valid:
return 0
inter = g.intersection(p).area
union = g.area + p.area - inter
return 0 if union == 0 else inter / union
def NMS(detectResult):
predBoxs = []
sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)
for i in range(len(sort_detectboxs)):
if sort_detectboxs[i].classId == -1:
continue
p1 = rotate_rectangle(sort_detectboxs[i].xmin, sort_detectboxs[i].ymin,
sort_detectboxs[i].xmax, sort_detectboxs[i].ymax,
sort_detectboxs[i].angle)
predBoxs.append(sort_detectboxs[i])
for j in range(i + 1, len(sort_detectboxs)):
if sort_detectboxs[j].classId == sort_detectboxs[i].classId:
p2 = rotate_rectangle(sort_detectboxs[j].xmin, sort_detectboxs[j].ymin,
sort_detectboxs[j].xmax, sort_detectboxs[j].ymax,
sort_detectboxs[j].angle)
if intersection(p1, p2) > nmsThresh:
sort_detectboxs[j].classId = -1
return predBoxs
def sigmoid(x):
return np.where(x >= 0, 1 / (1 + np.exp(-x)), np.exp(x) / (1 + np.exp(x)))
def softmax(x, axis=-1):
exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
return exp_x / np.sum(exp_x, axis=axis, keepdims=True)
def process(out, model_w, model_h, stride, angle_feature, index, scale_w=1, scale_h=1):
class_num = len(CLASSES)
angle_feature = angle_feature.reshape(-1)
xywh = out[:, :64, :]
conf = sigmoid(out[:, 64:, :]).reshape(-1)
boxes = []
for ik in range(model_h * model_w * class_num):
if conf[ik] > objectThresh:
w = ik % model_w
h = (ik % (model_w * model_h)) // model_w
c = ik // (model_w * model_h)
# 解析xywh
xywh_ = xywh[0, :, (h * model_w) + w].reshape(1, 4, 16, 1)
data = np.arange(16).reshape(1, 1, 16, 1)
xywh_ = softmax(xywh_, 2)
xywh_ = np.sum(xywh_ * data, axis=2).reshape(-1)
xywh_add = xywh_[:2] + xywh_[2:]
xywh_sub = (xywh_[2:] - xywh_[:2]) / 2
# 安全取角度
angle_idx = min(index + (h * model_w) + w, len(angle_feature) - 1)
angle = (angle_feature[angle_idx] - 0.25) * math.pi
cos_a, sin_a = math.cos(angle), math.sin(angle)
xy = xywh_sub[0] * cos_a - xywh_sub[1] * sin_a, xywh_sub[0] * sin_a + xywh_sub[1] * cos_a
xywh1 = np.array([xy[0] + w + 0.5, xy[1] + h + 0.5, xywh_add[0], xywh_add[1]])
xywh1 *= stride
xmin = (xywh1[0] - xywh1[2]/2) * scale_w
ymin = (xywh1[1] - xywh1[3]/2) * scale_h
xmax = (xywh1[0] + xywh1[2]/2) * scale_w
ymax = (xywh1[1] + xywh1[3]/2) * scale_h
boxes.append(DetectBox(c, conf[ik], xmin, ymin, xmax, ymax, angle))
return boxes
# ------------------- 主函数 -------------------
def detect_boxes_angle_rknn(model_path, image_path, save_path=None):
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图像: {image_path}")
return None, None
img_resized, scale, offset_x, offset_y = letterbox_resize(img, (640, 640))
infer_img = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
infer_img = np.expand_dims(infer_img, 0)
rknn_lite = RKNNLite(verbose=False)
rknn_lite.load_rknn(model_path)
rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
results = rknn_lite.inference([infer_img])
detect_boxes = []
for x in results[:-1]:
index, stride = 0, 0
if x.shape[2] == 20:
stride, index = 32, 20*4*20*4 + 20*2*20*2
elif x.shape[2] == 40:
stride, index = 16, 20*4*20*4
elif x.shape[2] == 80:
stride, index = 8, 0
feature = x.reshape(1, 65, -1)
detect_boxes += process(feature, x.shape[3], x.shape[2], stride, results[-1], index)
detect_boxes = NMS(detect_boxes)
# 输出每个检测框角度
for i, box in enumerate(detect_boxes):
print(f"{i+1}: angle = {box.angle:.4f} rad ({np.degrees(box.angle):.2f}°)")
if save_path:
xmin = int((box.xmin - offset_x)/scale)
ymin = int((box.ymin - offset_y)/scale)
xmax = int((box.xmax - offset_x)/scale)
ymax = int((box.ymax - offset_y)/scale)
points = rotate_rectangle(xmin, ymin, xmax, ymax, box.angle)
cv2.polylines(img, [np.array(points, np.int32)], True, (0, 255, 0), 1)
cv2.putText(img, f"{np.degrees(box.angle):.1f}°", (xmin, ymin-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
if save_path:
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, img)
print(f"✅ 带角度的检测结果已保存到 {save_path}")
rknn_lite.release()
return detect_boxes, img
# ------------------- 使用示例 -------------------
if __name__ == "__main__":
model_path = "obb.rknn"
image_path = "2.jpg"
save_path = "./inference_results/boxes_with_angle.jpg"
os.makedirs(os.path.dirname(save_path), exist_ok=True)
detect_boxes_angle_rknn(model_path, image_path, save_path)

BIN
ailai_pc/best.pt Normal file

Binary file not shown.

197
ailai_pc/bushu_angle.py Normal file
View File

@ -0,0 +1,197 @@
import cv2
import numpy as np
import math
from shapely.geometry import Polygon
from rknnlite.api import RKNNLite
import os
# ------------------- 配置 -------------------
CLASSES = ['clamp']
nmsThresh = 0.4
objectThresh = 0.5
# ------------------- 全局原图尺寸 -------------------
ORIG_W = 2560 # 原图宽
ORIG_H = 1440 # 原图高
# ------------------- 工具函数 -------------------
def letterbox_resize(image, size, bg_color=114):
target_width, target_height = size
image_height, image_width, _ = image.shape
scale = min(target_width / image_width, target_height / image_height)
new_width, new_height = int(image_width * scale), int(image_height * scale)
image_resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
canvas = np.ones((target_height, target_width, 3), dtype=np.uint8) * bg_color
offset_x, offset_y = (target_width - new_width) // 2, (target_height - new_height) // 2
canvas[offset_y:offset_y + new_height, offset_x:offset_x + new_width] = image_resized
return canvas, scale, offset_x, offset_y
class DetectBox:
def __init__(self, classId, score, xmin, ymin, xmax, ymax, angle):
self.classId = classId
self.score = score
self.xmin = xmin
self.ymin = ymin
self.xmax = xmax
self.ymax = ymax
self.angle = angle
def rotate_rectangle(x1, y1, x2, y2, a):
cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
x1_new = int((x1 - cx) * math.cos(a) - (y1 - cy) * math.sin(a) + cx)
y1_new = int((x1 - cx) * math.sin(a) + (y1 - cy) * math.cos(a) + cy)
x2_new = int((x2 - cx) * math.cos(a) - (y2 - cy) * math.sin(a) + cx)
y2_new = int((x2 - cx) * math.sin(a) + (y2 - cy) * math.cos(a) + cy)
x3_new = int((x1 - cx) * math.cos(a) - (y2 - cy) * math.sin(a) + cx)
y3_new = int((x1 - cx) * math.sin(a) + (y2 - cy) * math.cos(a) + cy)
x4_new = int((x2 - cx) * math.cos(a) - (y1 - cy) * math.sin(a) + cx)
y4_new = int((x2 - cx) * math.sin(a) + (y1 - cy) * math.cos(a) + cy)
return [(x1_new, y1_new), (x3_new, y3_new), (x2_new, y2_new), (x4_new, y4_new)]
def intersection(g, p):
g = Polygon(np.array(g).reshape(-1,2))
p = Polygon(np.array(p).reshape(-1,2))
if not g.is_valid or not p.is_valid:
return 0
inter = g.intersection(p).area
union = g.area + p.area - inter
return 0 if union == 0 else inter / union
def NMS(detectResult):
predBoxs = []
sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)
for i in range(len(sort_detectboxs)):
if sort_detectboxs[i].classId == -1:
continue
p1 = rotate_rectangle(sort_detectboxs[i].xmin, sort_detectboxs[i].ymin,
sort_detectboxs[i].xmax, sort_detectboxs[i].ymax,
sort_detectboxs[i].angle)
predBoxs.append(sort_detectboxs[i])
for j in range(i + 1, len(sort_detectboxs)):
if sort_detectboxs[j].classId == sort_detectboxs[i].classId:
p2 = rotate_rectangle(sort_detectboxs[j].xmin, sort_detectboxs[j].ymin,
sort_detectboxs[j].xmax, sort_detectboxs[j].ymax,
sort_detectboxs[j].angle)
if intersection(p1, p2) > nmsThresh:
sort_detectboxs[j].classId = -1
return predBoxs
def sigmoid(x):
return np.where(x >= 0, 1 / (1 + np.exp(-x)), np.exp(x) / (1 + np.exp(x)))
def softmax(x, axis=-1):
exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
return exp_x / np.sum(exp_x, axis=axis, keepdims=True)
# ------------------- 关键修改process函数加入scale -------------------
def process(out, model_w, model_h, stride, angle_feature, index, scale=1.0, offset_x=0, offset_y=0):
class_num = len(CLASSES)
angle_feature = angle_feature.reshape(-1)
xywh = out[:, :64, :]
conf = sigmoid(out[:, 64:, :]).reshape(-1)
boxes = []
for ik in range(model_h * model_w * class_num):
if conf[ik] > objectThresh:
w = ik % model_w
h = (ik % (model_w * model_h)) // model_w
c = ik // (model_w * model_h)
# 解析xywh
xywh_ = xywh[0, :, (h * model_w) + w].reshape(1, 4, 16, 1)
xywh_ = softmax(xywh_, 2)
data = np.arange(16).reshape(1, 1, 16, 1)
xywh_ = np.sum(xywh_ * data, axis=2).reshape(-1)
xywh_add = xywh_[:2] + xywh_[2:]
xywh_sub = (xywh_[2:] - xywh_[:2]) / 2
# 取角度
angle_idx = min(index + (h * model_w) + w, len(angle_feature) - 1)
angle = (angle_feature[angle_idx] - 0.25) * math.pi
cos_a, sin_a = math.cos(angle), math.sin(angle)
xy = xywh_sub[0] * cos_a - xywh_sub[1] * sin_a, xywh_sub[0] * sin_a + xywh_sub[1] * cos_a
xywh1 = np.array([xy[0] + w + 0.5, xy[1] + h + 0.5, xywh_add[0], xywh_add[1]])
xywh1 *= stride
# 映射回原图坐标
xmin = (xywh1[0] - xywh1[2]/2 - offset_x) / scale
ymin = (xywh1[1] - xywh1[3]/2 - offset_y) / scale
xmax = (xywh1[0] + xywh1[2]/2 - offset_x) / scale
ymax = (xywh1[1] + xywh1[3]/2 - offset_y) / scale
boxes.append(DetectBox(c, conf[ik], xmin, ymin, xmax, ymax, angle))
return boxes
# ------------------- 新可调用函数 -------------------
def detect_boxes_rknn(model_path, image_path):
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图像: {image_path}")
return None, None
img_resized, scale, offset_x, offset_y = letterbox_resize(img, (640, 640))
infer_img = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
infer_img = np.expand_dims(infer_img, 0)
rknn_lite = RKNNLite(verbose=False)
rknn_lite.load_rknn(model_path)
rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
results = rknn_lite.inference([infer_img])
detect_boxes = []
for x in results[:-1]:
index, stride = 0, 0
if x.shape[2] == 20:
stride, index = 32, 20*4*20*4 + 20*2*20*2
elif x.shape[2] == 40:
stride, index = 16, 20*4*20*4
elif x.shape[2] == 80:
stride, index = 8, 0
feature = x.reshape(1, 65, -1)
detect_boxes += process(feature, x.shape[3], x.shape[2], stride, results[-1], index,
scale=scale, offset_x=offset_x, offset_y=offset_y)
detect_boxes = NMS(detect_boxes)
rknn_lite.release()
return detect_boxes, img
# ------------------- 绘制与辅助函数 -------------------
def get_angles(detect_boxes):
return [box.angle for box in detect_boxes]
def draw_boxes(img, detect_boxes, save_path=None):
for box in detect_boxes:
points = rotate_rectangle(box.xmin, box.ymin, box.xmax, box.ymax, box.angle)
cv2.polylines(img, [np.array(points, np.int32)], True, (0, 255, 0), 1)
cv2.putText(img, f"{np.degrees(box.angle):.1f}°", (int(box.xmin), int(box.ymin)-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
if save_path:
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, img)
print(f"✅ 带角度的检测结果已保存到 {save_path}")
return img
def visualize_top_box(img, detect_boxes, save_path=None):
if not detect_boxes:
return img
top_box = max(detect_boxes, key=lambda x: x.score)
points = rotate_rectangle(top_box.xmin, top_box.ymin, top_box.xmax, top_box.ymax, top_box.angle)
cv2.polylines(img, [np.array(points, np.int32)], True, (0, 255, 0), 2)
cv2.putText(img, f"{np.degrees(top_box.angle):.1f}°", (int(top_box.xmin), int(top_box.ymin)-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
if save_path:
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, img)
return img
# ------------------- 使用示例 -------------------
if __name__ == "__main__":
model_path = "obb.rknn"
image_path = "2.jpg"
detect_boxes, img = detect_boxes_rknn(model_path, image_path)
angles = get_angles(detect_boxes)
for i, angle in enumerate(angles):
print(f"{i+1}: angle = {angle:.4f} rad ({np.degrees(angle):.2f}°)")
save_path_all = "./inference_results/boxes_all.jpg"
draw_boxes(img.copy(), detect_boxes, save_path_all)
save_path_top = "./inference_results/top_box.jpg"
visualize_top_box(img.copy(), detect_boxes, save_path_top)

145
ailai_pc/caculate_center.py Normal file
View File

@ -0,0 +1,145 @@
from ultralytics import YOLO
import cv2
import os
import numpy as np
# 设置类别名称(必须与训练时一致)
CLASS_NAMES = ['ban', 'bag'] # ✅ 确保顺序正确,对应模型的 class_id
COLORS = [(0, 255, 0), (255, 0, 0)] # ban: 绿色, bag: 蓝色
def get_best_angles_per_class(image_path, weight_path, return_degree=False):
"""
输入:
image_path: 图像路径
weight_path: YOLO OBB 权重路径
return_degree: 是否返回角度(单位:度),否则为弧度
输出:
字典:{ class_name: best_angle 或 None }
"""
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图像:{image_path}")
return {cls: None for cls in CLASS_NAMES}
model = YOLO(weight_path)
results = model(img, save=False, imgsz=640, conf=0.15, task='obb')
result = results[0]
boxes = result.obb
if boxes is None or len(boxes) == 0:
print("⚠️ 未检测到任何目标。")
return {cls: None for cls in CLASS_NAMES}
# 提取数据
xywhr = boxes.xywhr.cpu().numpy() # (N, 5) -> cx, cy, w, h, r (弧度)
confs = boxes.conf.cpu().numpy() # (N,)
class_ids = boxes.cls.cpu().numpy().astype(int) # (N,)
# 初始化结果字典
best_angles = {cls: None for cls in CLASS_NAMES}
# 对每个类别找置信度最高的框
for class_id, class_name in enumerate(CLASS_NAMES):
mask = (class_ids == class_id)
if not np.any(mask):
print(f"🟡 未检测到类别: {class_name}")
continue
# 找该类别中置信度最高的
idx_in_class = np.argmax(confs[mask])
global_idx = np.where(mask)[0][idx_in_class]
angle_rad = xywhr[global_idx][4]
best_angles[class_name] = np.degrees(angle_rad) if return_degree else angle_rad
return best_angles
def save_obb_visual(image_path, weight_path, save_path):
"""
输入:
image_path: 图像路径
weight_path: YOLO权重路径
save_path: 保存带标注图像路径
功能:
检测所有 OBB绘制框、类别名、旋转角度保存图片
"""
img = cv2.imread(image_path)
if img is None:
print(f"❌ 无法读取图像:{image_path}")
return
model = YOLO(weight_path)
results = model(img, save=False, imgsz=640, conf=0.15, task='obb')
result = results[0]
boxes = result.obb
if boxes is None or len(boxes) == 0:
print("⚠️ 未检测到任何目标。")
# 仍保存原图
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, img)
return
# 提取信息
xywhr = boxes.xywhr.cpu().numpy()
confs = boxes.conf.cpu().numpy()
class_ids = boxes.cls.cpu().numpy().astype(int)
# 绘制
annotated_img = img.copy()
for i in range(len(boxes)):
cx, cy, w, h, r = xywhr[i]
angle_deg = np.degrees(r)
class_id = class_ids[i]
class_name = CLASS_NAMES[class_id] if class_id < len(CLASS_NAMES) else f"cls{class_id}"
conf = confs[i]
color = COLORS[class_id % len(COLORS)] if class_id < len(CLASS_NAMES) else (128, 128, 128)
# 绘制旋转框
rect = ((cx, cy), (w, h), angle_deg)
box_pts = cv2.boxPoints(rect).astype(int)
cv2.polylines(annotated_img, [box_pts], isClosed=True, color=color, thickness=2)
# 标注文本:类别 + 置信度 + 角度
text = f"{class_name} {conf:.2f} {angle_deg:.1f}°"
font_scale = 0.7
thickness = 2
text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
# 文本背景
cv2.rectangle(annotated_img,
(box_pts[0][0], box_pts[0][1] - text_size[1] - 8),
(box_pts[0][0] + text_size[0], box_pts[0][1] + 2),
color, -1)
# 文本
cv2.putText(annotated_img, text,
(box_pts[0][0], box_pts[0][1] - 5),
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), thickness)
# 保存
os.makedirs(os.path.dirname(save_path), exist_ok=True)
cv2.imwrite(save_path, annotated_img)
print(f"✅ 检测结果已保存至: {save_path}")
# ===============================
# 示例调用
# ===============================
if __name__ == "__main__":
weight = r"/home/hx/yolo/ultralytics_yolo11-main/runs/train/exp_obb_ailai/weights/best.pt"
image = r"/home/hx/yolo/ailai_obb/camera01/1.jpg"
save_path = "./inference_results/visualized_2.jpg"
# 获取每个类别的最佳角度(以度为单位)
angles_deg = get_best_angles_per_class(image, weight, return_degree=True)
print("\n🎯 各类别最佳旋转角度(度):")
for cls_name, angle in angles_deg.items():
if angle is not None:
print(f" {cls_name}: {angle:.2f}°")
else:
print(f" {cls_name}: 未检测到")
# 可视化所有检测结果
save_obb_visual(image, weight, save_path)

View File

@ -0,0 +1,51 @@
import cv2
# 全局变量
refPt = []
drawing = False
def draw_line(event, x, y, flags, param):
global refPt, drawing, image
# 鼠标左键按下时,记录起始点坐标并开始绘制
if event == cv2.EVENT_LBUTTONDOWN:
refPt = [(x, y)]
drawing = True
# 当鼠标移动且处于绘制状态时,更新图像以显示当前的线段
elif event == cv2.EVENT_MOUSEMOVE and drawing:
temp_image = image.copy()
cv2.line(temp_image, refPt[0], (x, y), (0, 255, 0), 2)
cv2.imshow("Image", temp_image)
# 鼠标左键释放时,记录终点坐标,结束绘制并计算线段长度
elif event == cv2.EVENT_LBUTTONUP:
refPt.append((x, y))
drawing = False
# 在图像上画线
cv2.line(image, refPt[0], refPt[1], (0, 255, 0), 2)
cv2.imshow("Image", image)
# 计算线段长度
dx = refPt[1][0] - refPt[0][0]
dy = refPt[1][1] - refPt[0][1]
length = (dx ** 2 + dy ** 2) ** 0.5
print(f"线段长度: {length:.2f} 像素")
# 加载图像
image_path = 'your_image_path_here.jpg' # 替换为你的图像路径
image = cv2.imread(image_path)
cv2.namedWindow("Image")
cv2.setMouseCallback("Image", draw_line)
while True:
cv2.imshow("Image", image)
key = cv2.waitKey(1) & 0xFF
# 按下 'q' 键退出循环
if key == ord('q'):
break
cv2.destroyAllWindows()

40
ailai_pc/caculatet.py Normal file
View File

@ -0,0 +1,40 @@
import cv2
import numpy as np
# 全局变量
points = []
drawing = False # 是否开始绘图
def select_point(event, x, y, flags, param):
global drawing, points
if event == cv2.EVENT_LBUTTONDOWN:
# 当鼠标左键按下时记录第一个点
drawing = True
points = [(x, y)]
elif event == cv2.EVENT_LBUTTONUP:
# 当鼠标左键释放时记录第二个点,并完成线段的选择
drawing = False
points.append((x, y))
# 绘制线段
cv2.line(img, points[0], points[1], (0, 255, 0), 2)
# 计算两点间的距离
distance = np.sqrt((points[1][0] - points[0][0]) ** 2 + (points[1][1] - points[0][1]) ** 2)
print(f"线段的长度为: {distance:.2f} 像素")
# 显示更新后的图像
cv2.imshow('image', img)
img_path = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point1/val/192.168.0.234_01_20251014154410130.jpg' # 替换为你的图片路径
img = cv2.imread(img_path)
cv2.namedWindow('image')
cv2.setMouseCallback('image', select_point)
while(1):
cv2.imshow('image', img)
k = cv2.waitKey(1) & 0xFF
if k == 27: # 按下ESC退出
break
cv2.destroyAllWindows()

BIN
ailai_pc/camera01/1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

BIN
ailai_pc/camera01/2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

BIN
ailai_pc/camera01/3.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

BIN
ailai_pc/camera01/4.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

41
ailai_pc/cover_image.py Normal file
View File

@ -0,0 +1,41 @@
import os
import shutil
def overwrite_images(folder1, folder2):
"""
在 folder1 和 folder2 中查找同名图片,
用 folder1 的图片覆盖 folder2 中的同名图片。
"""
# 确保两个路径存在
if not os.path.exists(folder1) or not os.path.exists(folder2):
print("❌ 输入的文件夹路径不存在")
return
# 获取 folder1 所有文件
files1 = set(os.listdir(folder1))
files2 = set(os.listdir(folder2))
# 找出交集(相同名字的文件)
common_files = files1 & files2
if not common_files:
print("⚠️ 没有找到同名文件")
return
for file in common_files:
src = os.path.join(folder1, file)
dst = os.path.join(folder2, file)
# 仅处理图片文件(可根据需要扩展)
if src.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")):
shutil.copy2(src, dst)
print(f"✅ 已覆盖: {dst}")
print("🎯 覆盖完成!")
if __name__ == "__main__":
folder1 = r"/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/classdata1/save/class4" # 源文件夹
folder2 = r"/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/seg/resize_seg2/train" # 目标文件夹
overwrite_images(folder1, folder2)

174
ailai_pc/diff.py Normal file
View File

@ -0,0 +1,174 @@
import cv2
import numpy as np
from ultralytics import YOLO
# ====================== 用户配置 ======================
MODEL_PATH = 'best.pt'
IMAGE_PATH = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2/train/1.jpg' # 👈 修改为你的具体图像路径
OUTPUT_DIR = './output_images'
# 固定点(例如标定得到的理论位置)
FIXED_REF_POINT = (535.0, 605)# (x, y),单位:像素
def calculate_scale(width_mm, width_px):
"""
计算缩放因子单位mm/px
:param width_mm: 实际宽度(单位:毫米)
:param width_px: 宽度的像素数量
:return: 缩放因子单位mm/px
"""
if width_px == 0:
print("像素宽度不能为0")
return None
return width_mm / float(width_px)
# 示例使用:
# 假设我们知道一个参考物体的实际宽度是50毫米在图像中占据100个像素
width_mm = 70.0 # 实际宽度(单位:毫米)
width_px = 42 # 在图像中的宽度(单位:像素)
SCALE_X= calculate_scale(width_mm, width_px)
print(f"水平方向的缩放因子为: {SCALE_X:.3f} mm/px")
def calculate_scale_y(height_mm, height_px):
"""
计算垂直方向的缩放因子单位mm/px
:param height_mm: 实际高度(单位:毫米)
:param height_px: 高度的像素数量
:return: 缩放因子单位mm/px
"""
if height_px == 0:
print("像素高度不能为0")
return None
return height_mm / float(height_px)
# 同样地,对于高度来说
height_mm = 890.0 # 实际高度(单位:毫米)
height_px = 507 # 在图像中的高度(单位:像素)
SCALE_Y = calculate_scale_y(height_mm, height_px)
print(f"垂直方向的缩放因子为: {SCALE_Y:.3f} mm/px")
# 创建输出目录
import os
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ====================== 可视化函数(增强版)======================
def draw_keypoints_and_offset(image, kpts_xy, kpts_conf, orig_shape, fixed_point, scale_x, scale_y):
"""
在图像上绘制关键点、中心点、参考点、偏移箭头和文本
:param image: OpenCV 图像
:param kpts_xy: (N, K, 2) 坐标
:param kpts_conf: (N, K) 置信度
:param orig_shape: 原图尺寸 (H, W)
:param fixed_point: 固定参考点 (fx, fy)
:param scale_x: x方向缩放 mm/px
:param scale_y: y方向缩放 mm/px
:return: 处理后的图像,偏移信息列表
"""
colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0)] # 1红, 2蓝, 3绿, 4青
results_info = []
for i in range(len(kpts_xy)):
xy = kpts_xy[i] # (K, 2)
conf = kpts_conf[i] if kpts_conf.ndim == 2 else kpts_conf[i:i+1]
# 检查是否有至少两个关键点
if len(xy) < 2:
print(f"⚠️ 实例 {i} 的关键点数量不足2个")
continue
p1 = xy[0] # 第一个关键点
p2 = xy[1] # 第二个关键点
c1 = conf[0] if hasattr(conf, '__len__') else conf
c2 = conf[1] if hasattr(conf, '__len__') else conf
if c1 < 0.5 or c2 < 0.5:
print(f"⚠️ 实例 {i} 的前两个关键点置信度过低: c1={c1:.3f}, c2={c2:.3f}")
continue
# 转为整数坐标(仅用于绘制)
p1_int = tuple(map(int, p1))
p2_int = tuple(map(int, p2))
h, w = orig_shape
valid = all(0 <= x < w and 0 <= y < h for x, y in [p1, p2])
if not valid:
print(f"⚠️ 实例 {i} 的关键点超出图像边界")
continue
# 绘制前两个关键点
cv2.circle(image, p1_int, radius=15, color=colors[0], thickness=-1) # 红色
cv2.circle(image, p2_int, radius=15, color=colors[1], thickness=-1) # 蓝色
# 标注编号
cv2.putText(image, "1", (p1_int[0] + 20, p1_int[1] - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.5, colors[0], 5)
cv2.putText(image, "2", (p2_int[0] + 20, p2_int[1] - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.5, colors[1], 5)
center_x = (p1[0] + p2[0]) / 2.0
center_y = (p1[1] + p2[1]) / 2.0
dynamic_center = (int(center_x), int(center_y))
cv2.circle(image, dynamic_center, radius=18, color=(0, 255, 0), thickness=3)
cv2.putText(image, "Center", (dynamic_center[0] + 30, dynamic_center[1]),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)
fx, fy = map(int, fixed_point)
cv2.circle(image, (fx, fy), radius=20, color=(255, 255, 0), thickness=3)
cv2.putText(image, "Ref", (fx + 30, fy), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 0), 3)
dx_px = center_x - fixed_point[0]
dy_px = center_y - fixed_point[1]
dx_mm = dx_px * scale_x
dy_mm = dy_px * scale_y
cv2.arrowedLine(image, (fx, fy), dynamic_center, (0, 255, 255), 3, tipLength=0.05)
cv2.putText(image, f"ΔX={dx_mm:+.1f}mm", (fx + 40, fy - 40),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)
cv2.putText(image, f"ΔY={dy_mm:+.1f}mm", (fx + 40, fy + 40),
cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 3)
results_info.append({
'instance': i,
'center': (center_x, center_y),
'dx_px': dx_px, 'dy_px': dy_px,
'dx_mm': dx_mm, 'dy_mm': dy_mm
})
return image, results_info
if __name__ == "__main__":
img = cv2.imread(IMAGE_PATH)
if img is None:
print(f"❌ 无法读取图像,检查路径: {IMAGE_PATH}")
exit(1)
model = YOLO(MODEL_PATH)
results = model(img)
for i, result in enumerate(results):
if result.keypoints is not None:
kpts = result.keypoints
orig_shape = kpts.orig_shape
kpts_xy = kpts.xy.cpu().numpy()
kpts_conf = kpts.conf.cpu().numpy() if kpts.conf is not None else np.ones(kpts_xy.shape[:2])
img_with_kpts = img.copy()
img_with_kpts, offset_results = draw_keypoints_and_offset(
img_with_kpts, kpts_xy, kpts_conf, orig_shape,
fixed_point=FIXED_REF_POINT,
scale_x=SCALE_X, scale_y=SCALE_Y
)
for info in offset_results:
print(f" 📌 实例 {info['instance']}: "
f"ΔX={info['dx_mm']:+.2f}mm, ΔY={info['dy_mm']:+.2f}mm")
save_filename = f"offset_{os.path.basename(IMAGE_PATH)}"
save_path = os.path.join(OUTPUT_DIR, save_filename)
cv2.imwrite(save_path, img_with_kpts)
print(f" 💾 结果已保存: {save_path}")

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 255 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 244 KiB

View File

@ -0,0 +1 @@
617.1340942382812 225.45236206054688

View File

@ -0,0 +1 @@
617.1340942382812 225.45236206054688

Binary file not shown.

After

Width:  |  Height:  |  Size: 283 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 728 KiB

BIN
ailai_pc/point.pt Normal file

Binary file not shown.

129
ailai_pc/point_test.py Normal file
View File

@ -0,0 +1,129 @@
import cv2
import numpy as np
from ultralytics import YOLO
import os
# ====================== 用户配置 ======================
MODEL_PATH = 'best.pt'
IMAGE_SOURCE_DIR = './train' # 👈 修改为你的图像文件夹路径
OUTPUT_DIR = './output_images' # 保存结果的文件夹
# 支持的图像扩展名
IMG_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.tif', '.webp'}
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ====================== 可视化函数 ======================
def draw_keypoints_on_image(image, kpts_xy, kpts_conf, orig_shape):
"""
在图像上绘制关键点
:param image: OpenCV 图像
:param kpts_xy: (N, K, 2) 坐标
:param kpts_conf: (N, K) 置信度
:param orig_shape: 原图尺寸 (H, W)
"""
colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0)] # 1红, 2蓝, 3绿, 4青
for i in range(len(kpts_xy)):
xy = kpts_xy[i] # (4, 2)
conf = kpts_conf[i] if kpts_conf.ndim == 2 else kpts_conf[i:i+1] # (4,) 或标量
for j in range(len(xy)):
x, y = xy[j]
c = conf[j] if hasattr(conf, '__len__') else conf
x, y = int(x), int(y)
# 检查坐标是否在图像范围内
if x < 0 or y < 0 or x >= orig_shape[1] or y >= orig_shape[0]:
continue
# 只绘制置信度 > 0.5 的点
if c < 0.5:
continue
# 绘制实心圆
cv2.circle(image, (x, y), radius=15, color=colors[j], thickness=-1)
# 标注编号(偏移避免遮挡)
cv2.putText(image, f'{j+1}', (x + 20, y - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.5, colors[j], 5)
return image
# ====================== 主程序 ======================
if __name__ == "__main__":
print("🚀 开始批量关键点检测任务")
# 加载模型
print("🔄 加载 YOLO 模型...")
model = YOLO(MODEL_PATH)
print(f"✅ 模型加载完成: {MODEL_PATH}")
# 获取所有图像文件
image_files = [
f for f in os.listdir(IMAGE_SOURCE_DIR)
if os.path.splitext(f.lower())[1] in IMG_EXTENSIONS
]
if not image_files:
print(f"❌ 错误:在 {IMAGE_SOURCE_DIR} 中未找到支持的图像文件")
exit(1)
print(f"📁 发现 {len(image_files)} 张图像待处理")
# 遍历每张图像
for img_filename in image_files:
img_path = os.path.join(IMAGE_SOURCE_DIR, img_filename)
print(f"\n🖼️ 正在处理: {img_filename}")
# 读取图像
img = cv2.imread(img_path)
if img is None:
print(f"❌ 无法读取图像,跳过: {img_path}")
continue
print(f" ✅ 图像加载成功 (shape: {img.shape})")
# 推理
print(" 🔍 正在推理...")
results = model(img)
processed = False # 标记是否处理了关键点
for i, result in enumerate(results):
if result.keypoints is not None:
kpts = result.keypoints
orig_shape = kpts.orig_shape # (H, W)
# 获取坐标和置信度
kpts_xy = kpts.xy.cpu().numpy() # (N, K, 2)
kpts_conf = kpts.conf.cpu().numpy() if kpts.conf is not None else np.ones(kpts_xy.shape[:2])
print(f" ✅ 检测到 {len(kpts_xy)} 个实例")
# 绘制关键点
img_with_kpts = draw_keypoints_on_image(img.copy(), kpts_xy, kpts_conf, orig_shape)
# 保存图像
save_filename = f"keypoints_{img_filename}"
save_path = os.path.join(OUTPUT_DIR, save_filename)
cv2.imwrite(save_path, img_with_kpts)
print(f" 💾 结果已保存: {save_path}")
# 可选:显示图像(每次一张,按任意键继续)
# display_img = cv2.resize(img_with_kpts, (1280, 720))
# cv2.imshow("Keypoints Detection", display_img)
# print(" ⌨️ 按任意键继续...")
# cv2.waitKey(0)
# cv2.destroyAllWindows()
processed = True
if not processed:
print(f" ❌ 未检测到关键点,跳过保存")
print("\n" + "=" * 60)
print("🎉 批量推理完成!")
print(f"📊 总共处理 {len(image_files)} 张图像")
print(f"📁 结果保存在: {OUTPUT_DIR}")
print("=" * 60)

40
ailai_pc/rename_file.py Normal file
View File

@ -0,0 +1,40 @@
import os
import shutil
# ================= 用户配置 =================
FOLDER_PATH = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2' # 图片和 txt 所在文件夹
IMG_EXT = '.jpg'
TXT_EXT = '.txt'
START_NUM = 1 # 从 1 开始编号
# ================= 获取文件列表 =================
files = os.listdir(FOLDER_PATH)
# 分别筛选图片和 txt
images = sorted([f for f in files if f.lower().endswith(IMG_EXT)])
txts = sorted([f for f in files if f.lower().endswith(TXT_EXT)])
# 检查数量是否一致
if len(images) != len(txts):
print(f"⚠️ 图片数量 ({len(images)}) 与 txt 文件数量 ({len(txts)}) 不一致!")
# ================= 重命名 =================
for idx, (img_file, txt_file) in enumerate(zip(images, txts), start=START_NUM):
img_new = f"{idx}{IMG_EXT}"
txt_new = f"{idx}{TXT_EXT}"
# 原始完整路径
img_path = os.path.join(FOLDER_PATH, img_file)
txt_path = os.path.join(FOLDER_PATH, txt_file)
# 新路径
img_new_path = os.path.join(FOLDER_PATH, img_new)
txt_new_path = os.path.join(FOLDER_PATH, txt_new)
# 重命名(如果已经存在则覆盖)
shutil.move(img_path, img_new_path)
shutil.move(txt_path, txt_new_path)
print(f"{img_file} -> {img_new}, {txt_file} -> {txt_new}")
print("🎉 全部文件重命名完成!")

56
ailai_pc/rename_image.py Normal file
View File

@ -0,0 +1,56 @@
import os
from pathlib import Path
def rename_images_in_folder(folder_path):
# 支持的图片扩展名(不区分大小写)
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.webp'}
# 转换为 Path 对象
folder = Path(folder_path)
# 检查文件夹是否存在
if not folder.exists():
print(f"❌ 文件夹不存在: {folder_path}")
return
if not folder.is_dir():
print(f"❌ 路径不是文件夹: {folder_path}")
return
# 获取所有图片文件
image_files = [f for f in folder.iterdir()
if f.is_file() and f.suffix.lower() in image_extensions]
if not image_files:
print("🔍 文件夹中没有找到图片文件。")
return
# 排序(按文件名排序,确保顺序一致)
image_files.sort()
print(f"📁 正在处理文件夹: {folder}")
print(f"🖼️ 找到 {len(image_files)} 个图片文件")
renamed_count = 0
for idx, file_path in enumerate(image_files, start=1):
new_name = f"{idx}.jpg" # 统一输出为 .jpg 格式
new_path = folder / new_name
# 防止覆盖已存在的目标文件
while new_path.exists():
print(f"⚠️ {new_name} 已存在,跳过或改名?")
# 可以选择跳过,或用不同逻辑处理
break
else:
file_path.rename(new_path)
print(f"{file_path.name}{new_name}")
renamed_count += 1
print(f"\n✅ 完成!共重命名 {renamed_count} 个文件。")
# ===========================
# 🔧 使用这里:设置你的文件夹路径
# ===========================
if __name__ == "__main__":
folder = r"/home/hx/下载/2025-09-24" # <-- 修改为你的图片文件夹路径
rename_images_in_folder(folder)

Binary file not shown.

After

Width:  |  Height:  |  Size: 407 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 403 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 368 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 391 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 377 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 403 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 380 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 324 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 309 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 393 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 363 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 345 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 320 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 360 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 387 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 386 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 381 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 381 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 404 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 392 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 413 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 359 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 362 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 361 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 372 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 352 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 376 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 344 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 363 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 358 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 333 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 396 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 382 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 382 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 366 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 376 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 389 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 356 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 396 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 441 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 409 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 346 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 392 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 404 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 369 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 421 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 358 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 363 KiB

116
ailai_pc/trans_cvattoobb.py Normal file
View File

@ -0,0 +1,116 @@
# pascal_robndbox_to_yolo_obb.py
import xml.etree.ElementTree as ET
import numpy as np
from pathlib import Path
import argparse
def robndbox_to_yolo_obb(xml_path, output_dir, class_names):
"""
将单个带有 <robndbox> 的 Pascal VOC XML 转换为 YOLO-OBB 格式 .txt
"""
try:
tree = ET.parse(xml_path)
root = tree.getroot()
# 获取图像尺寸
width_elem = root.find("size/width")
height_elem = root.find("size/height")
if width_elem is None or height_elem is None:
print(f"❌ 跳过 {xml_path}: 缺少 size/width 或 size/height")
return
img_w = int(width_elem.text)
img_h = int(height_elem.text)
if img_w == 0 or img_h == 0:
print(f"❌ 跳过 {xml_path}: 图像尺寸为 0")
return
# 输出文件路径
label_file = Path(output_dir) / "labels" / (Path(xml_path).stem + ".txt")
label_file.parent.mkdir(parents=True, exist_ok=True)
lines = []
for obj in root.findall("object"):
name = obj.find("name").text
if name not in class_names:
print(f"⚠️ 跳过未知类别: {name} (文件: {xml_path.name})")
continue
class_id = class_names.index(name)
rb = obj.find("robndbox")
if rb is None:
print(f"⚠️ 跳过无 robndbox 的对象: {name}")
continue
cx = float(rb.find("cx").text)
cy = float(rb.find("cy").text)
w = float(rb.find("w").text)
h = float(rb.find("h").text)
angle_deg = float(rb.find("angle").text)
# 计算四个角点(相对于中心旋转)
angle_rad = np.radians(angle_deg)
cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
corners = np.array([
[-w/2, -h/2],
[ w/2, -h/2],
[ w/2, h/2],
[-w/2, h/2]
])
rotation_matrix = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
rotated_corners = np.dot(corners, rotation_matrix.T) + [cx, cy]
# 归一化到 [0,1]
rotated_corners[:, 0] /= img_w
rotated_corners[:, 1] /= img_h
# 展平并生成 YOLO-OBB 行
coords = rotated_corners.flatten()
line = str(class_id) + " " + " ".join(f"{x:.6f}" for x in coords)
lines.append(line)
# 只有存在有效标注才写入文件
if lines:
with open(label_file, "w", encoding="utf-8") as f:
f.write("\n".join(lines) + "\n")
print(f"✅ 已生成: {label_file}")
else:
print(f"🟡 无有效标注,跳过生成: {label_file}")
except Exception as e:
print(f"❌ 处理 {xml_path} 时出错: {e}")
def main():
# ==================== 配置区 ====================
# ✅ 修改以下路径和类别
XML_DIR = "/home/hx/桌面/ailai_test/train" # 包含 .xml 文件的目录
OUTPUT_DIR = "yolo_obb_dataset" # 输出目录
CLASS_NAMES = ["ban", "bag"] # 你的类别列表,顺序即 class_id
# ==============================================
xml_dir = Path(XML_DIR)
output_dir = Path(OUTPUT_DIR)
if not xml_dir.exists():
raise FileNotFoundError(f"未找到 XML 目录: {xml_dir}")
# 查找所有 .xml 文件
xml_files = list(xml_dir.glob("*.xml"))
if not xml_files:
print(f"⚠️ 在 {xml_dir} 中未找到 .xml 文件")
return
print(f"🔍 找到 {len(xml_files)} 个 XML 文件")
print(f"📦 类别映射: { {i: name for i, name in enumerate(CLASS_NAMES)} }")
# 批量转换
for xml_file in xml_files:
robndbox_to_yolo_obb(xml_file, output_dir, CLASS_NAMES)
print(f"\n🎉 转换完成!标签已保存至: {output_dir / 'labels'}")
if __name__ == "__main__":
main()

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 MiB

View File

@ -0,0 +1,2 @@
0 0.287799 0.126397 0.708264 0.166508 0.698037 0.505318 0.277572 0.465207
1 0.405951 0.167969 0.626046 0.189171 0.618472 0.437663 0.398377 0.416461

Some files were not shown because too many files have changed in this diff Show More