zjsh_yolov11/推理图片反向上传CVAT/point/trans_pointtocvat.py


# convert_yolo_to_cvat.py
import os
import xml.etree.ElementTree as ET
import cv2

# 输入 YOLO 标签目录
labels_dir = "keypoints_txt"

# 输出 XML 文件
output_xml = "annotations_cvat.xml"

# 图片目录（用于 width/height）
images_dir = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251230"

# 类别映射
class_mapping_reverse = {
    0: 'clamp1',
}

def unnormalize_bbox(img_w, img_h, xc, yc, w, h):
    """YOLO 归一化 bbox 转回绝对坐标"""
    cx = xc * img_w
    cy = yc * img_h
    bw = w * img_w
    bh = h * img_h

    x1 = cx - bw / 2
    y1 = cy - bh / 2
    x2 = cx + bw / 2
    y2 = cy + bh / 2
    return x1, y1, x2, y2


def unnormalize_points(img_w, img_h, kpts):
    """恢复关键点绝对坐标"""
    pts = []
    for i in range(0, len(kpts), 3):
        x_n = float(kpts[i])
        y_n = float(kpts[i + 1])
        x = x_n * img_w
        y = y_n * img_h
        pts.append((x, y))
    return pts


# ----------------------- 构建 XML 树结构 -----------------------

root = ET.Element("annotations")
version = ET.SubElement(root, "version")
version.text = "1.1"

meta = ET.SubElement(root, "meta")
task = ET.SubElement(meta, "task")
ET.SubElement(task, "name").text = "converted_from_yolo"
# size 仍为总 txt 数量（即使部分无图）
txt_files = [f for f in os.listdir(labels_dir) if f.endswith(".txt")]
ET.SubElement(task, "size").text = str(len(txt_files))

# labels 元信息（增强兼容性）
labels_elem = ET.SubElement(task, "labels")
for name in class_mapping_reverse.values():
    lab = ET.SubElement(labels_elem, "label")
    ET.SubElement(lab, "name").text = name
    ET.SubElement(lab, "color").text = "#ffffff"
    ET.SubElement(lab, "type").text = "any"
    ET.SubElement(lab, "attributes")

ET.SubElement(meta, "dumped").text = ""

# ----------------------- 处理每个 txt 文件（按顺序分配 id）-----------------------
sorted_txts = sorted(txt_files)

for idx, txt_file in enumerate(sorted_txts):
    base = os.path.splitext(txt_file)[0]
    img_path = os.path.join(images_dir, base + ".jpg")

    # 尝试其他常见扩展名
    img_found = False
    for ext in [".jpg", ".jpeg", ".png", ".bmp"]:
        candidate = os.path.join(images_dir, base + ext)
        if os.path.exists(candidate):
            img_path = candidate
            img_found = True
            break

    if not img_found:
        print(f"❗缺少图片：{base}.* （ID={idx} 被跳过）")
        continue  # 不生成 <image>，但 idx 已计入顺序

    # 读取图片尺寸
    img = cv2.imread(img_path)
    if img is None:
        print(f"❗无法读取图片：{img_path} （ID={idx} 被跳过）")
        continue

    h, w = img.shape[:2]

    # 使用 enumerate 的 idx 作为 id（按 txt 顺序）
    image_elem = ET.SubElement(
        root, "image",
        id=str(idx),
        name=os.path.basename(img_path),
        width=str(w),
        height=str(h)
    )

    # 读取 YOLO label
    yolo_path = os.path.join(labels_dir, txt_file)
    with open(yolo_path, "r") as f:
        line = f.readline().strip()
        if not line:
            print(f"⚠ 空标签文件：{yolo_path}")
            continue
        parts = line.split()

    cls_id = int(parts[0])
    label_name = class_mapping_reverse[cls_id]

    xc, yc, bw, bh = map(float, parts[1:5])
    kpts = list(map(float, parts[5:]))

    # 反归一化 bbox
    x1, y1, x2, y2 = unnormalize_bbox(w, h, xc, yc, bw, bh)

    # 反归一化关键点
    pts = unnormalize_points(w, h, kpts)

    # ---------------- CVAT <points> ----------------
    pts_str = ";".join([f"{x:.2f},{y:.2f}" for x, y in pts])
    ET.SubElement(
        image_elem, "points",
        label=label_name,
        source="manual",
        occluded="0",
        points=pts_str,
        z_order="0"
    )

    # ---------------- CVAT <box> ----------------
    ET.SubElement(
        image_elem, "box",
        label=label_name,
        source="manual",
        occluded="0",
        xtl=f"{x1:.2f}",
        ytl=f"{y1:.2f}",
        xbr=f"{x2:.2f}",
        ybr=f"{y2:.2f}",
        z_order="0"
    )


# ----------------------- 写出 XML 文件 -----------------------
tree = ET.ElementTree(root)
tree.write(output_xml, encoding="utf-8", xml_declaration=True)

print("\n🎉 成功完成 YOLO → CVAT XML 转换！")
print(f"📁 输出文件：{output_xml}")
print(f"ℹ️  共扫描 {len(sorted_txts)} 个标签文件，按其顺序分配 ID（缺失图片的 ID 已跳过）。")