zjsh_yolov11/yolo11_detect/trans_detecttocvat.py

import os
import xml.etree.ElementTree as ET
from pathlib import Path


def yolo_detect_to_cvat(
    images_dir,
    labels_dir,
    output_xml,
    class_id_to_name,
):
    """
    将 YOLO Detect 格式转换为 CVAT XML（目标检测 box）

    Args:
        images_dir (str): 图片目录
        labels_dir (str): YOLO txt 标注目录
        output_xml (str): 输出 CVAT XML 路径
        class_id_to_name (dict): {0: "xxx", 1: "yyy"}
    """

    images_dir = Path(images_dir)
    labels_dir = Path(labels_dir)

    # ----------------------------
    # 创建 XML 结构
    # ----------------------------
    annotations = ET.Element("annotations")

    version = ET.SubElement(annotations, "version")
    version.text = "1.1"

    # labels
    meta = ET.SubElement(annotations, "meta")
    task = ET.SubElement(meta, "task")
    labels_elem = ET.SubElement(task, "labels")

    for class_id, name in class_id_to_name.items():
        label = ET.SubElement(labels_elem, "label")
        name_elem = ET.SubElement(label, "name")
        name_elem.text = name

    image_id = 0

    # ----------------------------
    # 遍历图片
    # ----------------------------
    for img_path in sorted(images_dir.iterdir()):
        if img_path.suffix.lower() not in [".jpg", ".png", ".jpeg"]:
            continue

        import cv2
        img = cv2.imread(str(img_path))
        if img is None:
            print(f"⚠️ 无法读取图片: {img_path}")
            continue

        height, width = img.shape[:2]

        image_elem = ET.SubElement(
            annotations,
            "image",
            {
                "id": str(image_id),
                "name": img_path.name,
                "width": str(width),
                "height": str(height),
            }
        )

        label_txt = labels_dir / f"{img_path.stem}.txt"

        # ----------------------------
        # 有 / 无标签都要建 image
        # ----------------------------
        if label_txt.exists():
            with open(label_txt, "r") as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue

                    class_id, xc, yc, w, h = map(float, line.split())
                    class_id = int(class_id)

                    label_name = class_id_to_name.get(class_id)
                    if label_name is None:
                        print(f"⚠️ 未知 class_id: {class_id}")
                        continue

                    # YOLO → CVAT 坐标
                    box_w = w * width
                    box_h = h * height
                    xtl = xc * width - box_w / 2
                    ytl = yc * height - box_h / 2
                    xbr = xtl + box_w
                    ybr = ytl + box_h

                    ET.SubElement(
                        image_elem,
                        "box",
                        {
                            "label": label_name,
                            "xtl": f"{xtl:.2f}",
                            "ytl": f"{ytl:.2f}",
                            "xbr": f"{xbr:.2f}",
                            "ybr": f"{ybr:.2f}",
                            "occluded": "0",
                            "source": "manual",
                        }
                    )

        image_id += 1
        print(f"✅ {img_path.name}")

    # ----------------------------
    # 写 XML
    # ----------------------------
    tree = ET.ElementTree(annotations)
    tree.write(output_xml, encoding="utf-8", xml_declaration=True)

    print(f"\n🎉 转换完成，CVAT XML 已生成：{output_xml}")

if __name__ == "__main__":
    IMAGES_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/detect/1"
    LABELS_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/detect/1"
    OUTPUT_XML = "annotations.xml"

    # ⚠️ 一定要和 YOLO 训练时一致
    CLASS_ID_TO_NAME = {
        #0: "hole",
        #1: "crack"
        0: "bag"
    }

    yolo_detect_to_cvat(
        images_dir=IMAGES_DIR,
        labels_dir=LABELS_DIR,
        output_xml=OUTPUT_XML,
        class_id_to_name=CLASS_ID_TO_NAME
    )