# convert_yolo_to_cvat.py import os import xml.etree.ElementTree as ET import cv2 # 输入 YOLO 标签目录 labels_dir = "keypoints_txt" # 输出 XML 文件 output_xml = "annotations_cvat.xml" # 图片目录(用于 width/height) images_dir = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251230" # 类别映射 class_mapping_reverse = { 0: 'clamp1', } def unnormalize_bbox(img_w, img_h, xc, yc, w, h): """YOLO 归一化 bbox 转回绝对坐标""" cx = xc * img_w cy = yc * img_h bw = w * img_w bh = h * img_h x1 = cx - bw / 2 y1 = cy - bh / 2 x2 = cx + bw / 2 y2 = cy + bh / 2 return x1, y1, x2, y2 def unnormalize_points(img_w, img_h, kpts): """恢复关键点绝对坐标""" pts = [] for i in range(0, len(kpts), 3): x_n = float(kpts[i]) y_n = float(kpts[i + 1]) x = x_n * img_w y = y_n * img_h pts.append((x, y)) return pts # ----------------------- 构建 XML 树结构 ----------------------- root = ET.Element("annotations") version = ET.SubElement(root, "version") version.text = "1.1" meta = ET.SubElement(root, "meta") task = ET.SubElement(meta, "task") ET.SubElement(task, "name").text = "converted_from_yolo" # size 仍为总 txt 数量(即使部分无图) txt_files = [f for f in os.listdir(labels_dir) if f.endswith(".txt")] ET.SubElement(task, "size").text = str(len(txt_files)) # labels 元信息(增强兼容性) labels_elem = ET.SubElement(task, "labels") for name in class_mapping_reverse.values(): lab = ET.SubElement(labels_elem, "label") ET.SubElement(lab, "name").text = name ET.SubElement(lab, "color").text = "#ffffff" ET.SubElement(lab, "type").text = "any" ET.SubElement(lab, "attributes") ET.SubElement(meta, "dumped").text = "" # ----------------------- 处理每个 txt 文件(按顺序分配 id)----------------------- sorted_txts = sorted(txt_files) for idx, txt_file in enumerate(sorted_txts): base = os.path.splitext(txt_file)[0] img_path = os.path.join(images_dir, base + ".jpg") # 尝试其他常见扩展名 img_found = False for ext in [".jpg", ".jpeg", ".png", ".bmp"]: candidate = os.path.join(images_dir, base + ext) if os.path.exists(candidate): img_path = candidate img_found = True break if not img_found: print(f"❗缺少图片:{base}.* (ID={idx} 被跳过)") continue # 不生成 ,但 idx 已计入顺序 # 读取图片尺寸 img = cv2.imread(img_path) if img is None: print(f"❗无法读取图片:{img_path} (ID={idx} 被跳过)") continue h, w = img.shape[:2] # 使用 enumerate 的 idx 作为 id(按 txt 顺序) image_elem = ET.SubElement( root, "image", id=str(idx), name=os.path.basename(img_path), width=str(w), height=str(h) ) # 读取 YOLO label yolo_path = os.path.join(labels_dir, txt_file) with open(yolo_path, "r") as f: line = f.readline().strip() if not line: print(f"⚠ 空标签文件:{yolo_path}") continue parts = line.split() cls_id = int(parts[0]) label_name = class_mapping_reverse[cls_id] xc, yc, bw, bh = map(float, parts[1:5]) kpts = list(map(float, parts[5:])) # 反归一化 bbox x1, y1, x2, y2 = unnormalize_bbox(w, h, xc, yc, bw, bh) # 反归一化关键点 pts = unnormalize_points(w, h, kpts) # ---------------- CVAT ---------------- pts_str = ";".join([f"{x:.2f},{y:.2f}" for x, y in pts]) ET.SubElement( image_elem, "points", label=label_name, source="manual", occluded="0", points=pts_str, z_order="0" ) # ---------------- CVAT ---------------- ET.SubElement( image_elem, "box", label=label_name, source="manual", occluded="0", xtl=f"{x1:.2f}", ytl=f"{y1:.2f}", xbr=f"{x2:.2f}", ybr=f"{y2:.2f}", z_order="0" ) # ----------------------- 写出 XML 文件 ----------------------- tree = ET.ElementTree(root) tree.write(output_xml, encoding="utf-8", xml_declaration=True) print("\n🎉 成功完成 YOLO → CVAT XML 转换!") print(f"📁 输出文件:{output_xml}") print(f"ℹ️ 共扫描 {len(sorted_txts)} 个标签文件,按其顺序分配 ID(缺失图片的 ID 已跳过)。")