# yolo_detect_to_cvat.py import os import xml.etree.ElementTree as ET from pathlib import Path import cv2 IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'} def yolo_detect_to_cvat_xml(label_dir, image_dir, class_id_to_name, output_xml): """ 将 YOLO Detect 格式的标签(class cx cy w h)转换为 CVAT XML 格式。 """ label_dir = Path(label_dir) image_dir = Path(image_dir) # ======== 构建基本 XML 结构 ======== root = ET.Element("annotations") ET.SubElement(root, "version").text = "1.1" meta = ET.SubElement(root, "meta") task = ET.SubElement(meta, "task") txt_files = sorted([f for f in label_dir.glob("*.txt")]) total = len(txt_files) ET.SubElement(task, "id").text = "1" ET.SubElement(task, "name").text = "yolo_detect_import" ET.SubElement(task, "size").text = str(total) ET.SubElement(task, "mode").text = "annotation" ET.SubElement(task, "overlap").text = "0" ET.SubElement(task, "bugtracker").text = "" ET.SubElement(task, "created").text = "" ET.SubElement(task, "updated").text = "" ET.SubElement(task, "subset").text = "default" ET.SubElement(task, "start_frame").text = "0" ET.SubElement(task, "stop_frame").text = str(total - 1) ET.SubElement(task, "frame_filter").text = "" # labels labels_elem = ET.SubElement(task, "labels") for name in class_id_to_name.values(): lab = ET.SubElement(labels_elem, "label") ET.SubElement(lab, "name").text = name ET.SubElement(lab, "color").text = "#ffffff" ET.SubElement(lab, "type").text = "any" ET.SubElement(lab, "attributes") ET.SubElement(meta, "dumped").text = "" # ======== 处理每张图片 ======== for idx, txt_file in enumerate(txt_files): stem = txt_file.stem # 自动匹配图像文件(支持多种扩展名) img_path = None for ext in IMG_EXTENSIONS: p = image_dir / f"{stem}{ext}" if p.exists(): img_path = p break p = image_dir / f"{stem.upper()}{ext}" if p.exists(): img_path = p break if img_path is None: print(f"⚠ 找不到对应图像: {stem}") continue # 获取图像尺寸(用于反归一化) img = cv2.imread(str(img_path)) if img is None: print(f"⚠ 无法读取图像: {img_path},跳过") H, W = 1080, 1920 # fallback else: H, W = img.shape[:2] # 创建 节点 image_elem = ET.SubElement(root, "image", { "id": str(idx), "name": img_path.name, "width": str(W), "height": str(H) }) # 读取 YOLO Detect 标签 with open(txt_file, "r") as f: for line in f: line = line.strip() if not line: continue parts = line.split() if len(parts) != 5: print(f"⚠ 标签格式错误(应为5列): {line} in {txt_file}") continue cls_id = int(parts[0]) cx, cy, bw, bh = map(float, parts[1:]) # 反归一化 cx_abs = cx * W cy_abs = cy * H w_abs = bw * W h_abs = bh * H # 计算左上和右下 xtl = cx_abs - w_abs / 2 ytl = cy_abs - h_abs / 2 xbr = cx_abs + w_abs / 2 ybr = cy_abs + h_abs / 2 # 边界裁剪(防止越界) xtl = max(0, min(W, xtl)) ytl = max(0, min(H, ytl)) xbr = max(0, min(W, xbr)) ybr = max(0, min(H, ybr)) # 添加 box(无 rotation 字段!) ET.SubElement(image_elem, "box", { "label": class_id_to_name.get(cls_id, f"class_{cls_id}"), "source": "manual", "occluded": "0", "xtl": f"{xtl:.2f}", "ytl": f"{ytl:.2f}", "xbr": f"{xbr:.2f}", "ybr": f"{ybr:.2f}", "z_order": "0" }) print(f"✔ 处理 {img_path.name}") # 保存 XML tree = ET.ElementTree(root) tree.write(output_xml, encoding="utf-8", xml_declaration=True) print(f"\n✅ 已生成 CVAT XML 文件: {output_xml}") # ------------------- 主函数 ------------------- if __name__ == "__main__": CLASS_MAP = { 0: "bag", 1: "bag35" } yolo_detect_to_cvat_xml( label_dir="/home/hx/yolo/推理图片反向上传CVAT/detect/inference_results/labels", image_dir="/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/ailaidete/train/delet", class_id_to_name=CLASS_MAP, output_xml="detect_annotations.xml" )