zjsh_yolov11/推理图片反向上传CVAT/detect/trans_obbtocvat.py

# yolo_detect_to_cvat.py
import os
import xml.etree.ElementTree as ET
from pathlib import Path
import cv2

IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'}


def yolo_detect_to_cvat_xml(label_dir, image_dir, class_id_to_name, output_xml):
    """
    将 YOLO Detect 格式的标签（class cx cy w h）转换为 CVAT XML 格式。
    """

    label_dir = Path(label_dir)
    image_dir = Path(image_dir)

    # ======== 构建基本 XML 结构 ========
    root = ET.Element("annotations")
    ET.SubElement(root, "version").text = "1.1"

    meta = ET.SubElement(root, "meta")
    task = ET.SubElement(meta, "task")

    txt_files = sorted([f for f in label_dir.glob("*.txt")])
    total = len(txt_files)

    ET.SubElement(task, "id").text = "1"
    ET.SubElement(task, "name").text = "yolo_detect_import"
    ET.SubElement(task, "size").text = str(total)
    ET.SubElement(task, "mode").text = "annotation"
    ET.SubElement(task, "overlap").text = "0"
    ET.SubElement(task, "bugtracker").text = ""
    ET.SubElement(task, "created").text = ""
    ET.SubElement(task, "updated").text = ""
    ET.SubElement(task, "subset").text = "default"
    ET.SubElement(task, "start_frame").text = "0"
    ET.SubElement(task, "stop_frame").text = str(total - 1)
    ET.SubElement(task, "frame_filter").text = ""

    # labels
    labels_elem = ET.SubElement(task, "labels")
    for name in class_id_to_name.values():
        lab = ET.SubElement(labels_elem, "label")
        ET.SubElement(lab, "name").text = name
        ET.SubElement(lab, "color").text = "#ffffff"
        ET.SubElement(lab, "type").text = "any"
        ET.SubElement(lab, "attributes")

    ET.SubElement(meta, "dumped").text = ""

    # ======== 处理每张图片 ========
    for idx, txt_file in enumerate(txt_files):
        stem = txt_file.stem

        # 自动匹配图像文件（支持多种扩展名）
        img_path = None
        for ext in IMG_EXTENSIONS:
            p = image_dir / f"{stem}{ext}"
            if p.exists():
                img_path = p
                break
            p = image_dir / f"{stem.upper()}{ext}"
            if p.exists():
                img_path = p
                break

        if img_path is None:
            print(f"⚠ 找不到对应图像: {stem}")
            continue

        # 获取图像尺寸（用于反归一化）
        img = cv2.imread(str(img_path))
        if img is None:
            print(f"⚠ 无法读取图像: {img_path}，跳过")
            H, W = 1080, 1920  # fallback
        else:
            H, W = img.shape[:2]

        # 创建 <image> 节点
        image_elem = ET.SubElement(root, "image", {
            "id": str(idx),
            "name": img_path.name,
            "width": str(W),
            "height": str(H)
        })

        # 读取 YOLO Detect 标签
        with open(txt_file, "r") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                parts = line.split()
                if len(parts) != 5:
                    print(f"⚠ 标签格式错误（应为5列）: {line} in {txt_file}")
                    continue

                cls_id = int(parts[0])
                cx, cy, bw, bh = map(float, parts[1:])

                # 反归一化
                cx_abs = cx * W
                cy_abs = cy * H
                w_abs = bw * W
                h_abs = bh * H

                # 计算左上和右下
                xtl = cx_abs - w_abs / 2
                ytl = cy_abs - h_abs / 2
                xbr = cx_abs + w_abs / 2
                ybr = cy_abs + h_abs / 2

                # 边界裁剪（防止越界）
                xtl = max(0, min(W, xtl))
                ytl = max(0, min(H, ytl))
                xbr = max(0, min(W, xbr))
                ybr = max(0, min(H, ybr))

                # 添加 box（无 rotation 字段！）
                ET.SubElement(image_elem, "box", {
                    "label": class_id_to_name.get(cls_id, f"class_{cls_id}"),
                    "source": "manual",
                    "occluded": "0",
                    "xtl": f"{xtl:.2f}",
                    "ytl": f"{ytl:.2f}",
                    "xbr": f"{xbr:.2f}",
                    "ybr": f"{ybr:.2f}",
                    "z_order": "0"
                })

        print(f"✔ 处理 {img_path.name}")

    # 保存 XML
    tree = ET.ElementTree(root)
    tree.write(output_xml, encoding="utf-8", xml_declaration=True)
    print(f"\n✅ 已生成 CVAT XML 文件: {output_xml}")


# ------------------- 主函数 -------------------
if __name__ == "__main__":
    CLASS_MAP = {
        0: "bag",
        1: "bag35"
    }

    yolo_detect_to_cvat_xml(
        label_dir="/home/hx/yolo/推理图片反向上传CVAT/detect/inference_results/labels",
        image_dir="/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/ailaidete/train/delet",
        class_id_to_name=CLASS_MAP,
        output_xml="detect_annotations.xml"
    )