Files

160 lines
4.4 KiB
Python
Raw Permalink Normal View History

2025-12-11 08:37:09 +08:00
# convert_yolo_to_cvat.py
import os
import xml.etree.ElementTree as ET
import cv2
# 输入 YOLO 标签目录
labels_dir = "keypoints_txt"
# 输出 XML 文件
output_xml = "annotations_cvat.xml"
# 图片目录(用于 width/height
2026-03-10 13:58:21 +08:00
images_dir = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251230"
2025-12-11 08:37:09 +08:00
# 类别映射
class_mapping_reverse = {
0: 'clamp1',
}
def unnormalize_bbox(img_w, img_h, xc, yc, w, h):
"""YOLO 归一化 bbox 转回绝对坐标"""
cx = xc * img_w
cy = yc * img_h
bw = w * img_w
bh = h * img_h
x1 = cx - bw / 2
y1 = cy - bh / 2
x2 = cx + bw / 2
y2 = cy + bh / 2
return x1, y1, x2, y2
def unnormalize_points(img_w, img_h, kpts):
"""恢复关键点绝对坐标"""
pts = []
for i in range(0, len(kpts), 3):
x_n = float(kpts[i])
y_n = float(kpts[i + 1])
x = x_n * img_w
y = y_n * img_h
pts.append((x, y))
return pts
# ----------------------- 构建 XML 树结构 -----------------------
root = ET.Element("annotations")
version = ET.SubElement(root, "version")
version.text = "1.1"
meta = ET.SubElement(root, "meta")
task = ET.SubElement(meta, "task")
ET.SubElement(task, "name").text = "converted_from_yolo"
# size 仍为总 txt 数量(即使部分无图)
txt_files = [f for f in os.listdir(labels_dir) if f.endswith(".txt")]
ET.SubElement(task, "size").text = str(len(txt_files))
# labels 元信息(增强兼容性)
labels_elem = ET.SubElement(task, "labels")
for name in class_mapping_reverse.values():
lab = ET.SubElement(labels_elem, "label")
ET.SubElement(lab, "name").text = name
ET.SubElement(lab, "color").text = "#ffffff"
ET.SubElement(lab, "type").text = "any"
ET.SubElement(lab, "attributes")
ET.SubElement(meta, "dumped").text = ""
# ----------------------- 处理每个 txt 文件(按顺序分配 id-----------------------
sorted_txts = sorted(txt_files)
for idx, txt_file in enumerate(sorted_txts):
base = os.path.splitext(txt_file)[0]
img_path = os.path.join(images_dir, base + ".jpg")
# 尝试其他常见扩展名
img_found = False
for ext in [".jpg", ".jpeg", ".png", ".bmp"]:
candidate = os.path.join(images_dir, base + ext)
if os.path.exists(candidate):
img_path = candidate
img_found = True
break
if not img_found:
print(f"❗缺少图片:{base}.* ID={idx} 被跳过)")
continue # 不生成 <image>,但 idx 已计入顺序
# 读取图片尺寸
img = cv2.imread(img_path)
if img is None:
print(f"❗无法读取图片:{img_path} ID={idx} 被跳过)")
continue
h, w = img.shape[:2]
# 使用 enumerate 的 idx 作为 id按 txt 顺序)
image_elem = ET.SubElement(
root, "image",
id=str(idx),
name=os.path.basename(img_path),
width=str(w),
height=str(h)
)
# 读取 YOLO label
yolo_path = os.path.join(labels_dir, txt_file)
with open(yolo_path, "r") as f:
line = f.readline().strip()
if not line:
print(f"⚠ 空标签文件:{yolo_path}")
continue
parts = line.split()
cls_id = int(parts[0])
label_name = class_mapping_reverse[cls_id]
xc, yc, bw, bh = map(float, parts[1:5])
kpts = list(map(float, parts[5:]))
# 反归一化 bbox
x1, y1, x2, y2 = unnormalize_bbox(w, h, xc, yc, bw, bh)
# 反归一化关键点
pts = unnormalize_points(w, h, kpts)
# ---------------- CVAT <points> ----------------
pts_str = ";".join([f"{x:.2f},{y:.2f}" for x, y in pts])
ET.SubElement(
image_elem, "points",
label=label_name,
source="manual",
occluded="0",
points=pts_str,
z_order="0"
)
# ---------------- CVAT <box> ----------------
ET.SubElement(
image_elem, "box",
label=label_name,
source="manual",
occluded="0",
xtl=f"{x1:.2f}",
ytl=f"{y1:.2f}",
xbr=f"{x2:.2f}",
ybr=f"{y2:.2f}",
z_order="0"
)
# ----------------------- 写出 XML 文件 -----------------------
tree = ET.ElementTree(root)
tree.write(output_xml, encoding="utf-8", xml_declaration=True)
print("\n🎉 成功完成 YOLO → CVAT XML 转换!")
print(f"📁 输出文件:{output_xml}")
print(f" 共扫描 {len(sorted_txts)} 个标签文件,按其顺序分配 ID缺失图片的 ID 已跳过)。")