增加cvat反向上传

2025-12-16 15:00:24 +08:00
parent 8b263167f8
commit 032479f558
16 changed files with 783 additions and 1766 deletions
--- a/推理图片反向上传CVAT/detect/trans_obbtocvat.py
+++ b/推理图片反向上传CVAT/detect/trans_obbtocvat.py
@ -0,0 +1,152 @@
+# yolo_detect_to_cvat.py
+import os
+import xml.etree.ElementTree as ET
+from pathlib import Path
+import cv2
+
+IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'}
+
+
+def yolo_detect_to_cvat_xml(label_dir, image_dir, class_id_to_name, output_xml):
+    """
+    将 YOLO Detect 格式的标签（class cx cy w h）转换为 CVAT XML 格式。
+    """
+
+    label_dir = Path(label_dir)
+    image_dir = Path(image_dir)
+
+    # ======== 构建基本 XML 结构 ========
+    root = ET.Element("annotations")
+    ET.SubElement(root, "version").text = "1.1"
+
+    meta = ET.SubElement(root, "meta")
+    task = ET.SubElement(meta, "task")
+
+    txt_files = sorted([f for f in label_dir.glob("*.txt")])
+    total = len(txt_files)
+
+    ET.SubElement(task, "id").text = "1"
+    ET.SubElement(task, "name").text = "yolo_detect_import"
+    ET.SubElement(task, "size").text = str(total)
+    ET.SubElement(task, "mode").text = "annotation"
+    ET.SubElement(task, "overlap").text = "0"
+    ET.SubElement(task, "bugtracker").text = ""
+    ET.SubElement(task, "created").text = ""
+    ET.SubElement(task, "updated").text = ""
+    ET.SubElement(task, "subset").text = "default"
+    ET.SubElement(task, "start_frame").text = "0"
+    ET.SubElement(task, "stop_frame").text = str(total - 1)
+    ET.SubElement(task, "frame_filter").text = ""
+
+    # labels
+    labels_elem = ET.SubElement(task, "labels")
+    for name in class_id_to_name.values():
+        lab = ET.SubElement(labels_elem, "label")
+        ET.SubElement(lab, "name").text = name
+        ET.SubElement(lab, "color").text = "#ffffff"
+        ET.SubElement(lab, "type").text = "any"
+        ET.SubElement(lab, "attributes")
+
+    ET.SubElement(meta, "dumped").text = ""
+
+    # ======== 处理每张图片 ========
+    for idx, txt_file in enumerate(txt_files):
+        stem = txt_file.stem
+
+        # 自动匹配图像文件（支持多种扩展名）
+        img_path = None
+        for ext in IMG_EXTENSIONS:
+            p = image_dir / f"{stem}{ext}"
+            if p.exists():
+                img_path = p
+                break
+            p = image_dir / f"{stem.upper()}{ext}"
+            if p.exists():
+                img_path = p
+                break
+
+        if img_path is None:
+            print(f"⚠ 找不到对应图像: {stem}")
+            continue
+
+        # 获取图像尺寸（用于反归一化）
+        img = cv2.imread(str(img_path))
+        if img is None:
+            print(f"⚠ 无法读取图像: {img_path}，跳过")
+            H, W = 1080, 1920  # fallback
+        else:
+            H, W = img.shape[:2]
+
+        # 创建 <image> 节点
+        image_elem = ET.SubElement(root, "image", {
+            "id": str(idx),
+            "name": img_path.name,
+            "width": str(W),
+            "height": str(H)
+        })
+
+        # 读取 YOLO Detect 标签
+        with open(txt_file, "r") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                parts = line.split()
+                if len(parts) != 5:
+                    print(f"⚠ 标签格式错误（应为5列）: {line} in {txt_file}")
+                    continue
+
+                cls_id = int(parts[0])
+                cx, cy, bw, bh = map(float, parts[1:])
+
+                # 反归一化
+                cx_abs = cx * W
+                cy_abs = cy * H
+                w_abs = bw * W
+                h_abs = bh * H
+
+                # 计算左上和右下
+                xtl = cx_abs - w_abs / 2
+                ytl = cy_abs - h_abs / 2
+                xbr = cx_abs + w_abs / 2
+                ybr = cy_abs + h_abs / 2
+
+                # 边界裁剪（防止越界）
+                xtl = max(0, min(W, xtl))
+                ytl = max(0, min(H, ytl))
+                xbr = max(0, min(W, xbr))
+                ybr = max(0, min(H, ybr))
+
+                # 添加 box（无 rotation 字段！）
+                ET.SubElement(image_elem, "box", {
+                    "label": class_id_to_name.get(cls_id, f"class_{cls_id}"),
+                    "source": "manual",
+                    "occluded": "0",
+                    "xtl": f"{xtl:.2f}",
+                    "ytl": f"{ytl:.2f}",
+                    "xbr": f"{xbr:.2f}",
+                    "ybr": f"{ybr:.2f}",
+                    "z_order": "0"
+                })
+
+        print(f"✔ 处理 {img_path.name}")
+
+    # 保存 XML
+    tree = ET.ElementTree(root)
+    tree.write(output_xml, encoding="utf-8", xml_declaration=True)
+    print(f"\n✅ 已生成 CVAT XML 文件: {output_xml}")
+
+
+# ------------------- 主函数 -------------------
+if __name__ == "__main__":
+    CLASS_MAP = {
+        0: "hole",
+        1: "crack"
+    }
+
+    yolo_detect_to_cvat_xml(
+        label_dir="/home/hx/yolo/推理图片反向上传CVAT/detect/inference_results/labels",
+        image_dir="/home/hx/开发/ML_xiantiao/class_xiantiao_pc/test_image/train",
+        class_id_to_name=CLASS_MAP,
+        output_xml="detect_annotations.xml"
+    )
--- a/推理图片反向上传CVAT/detect/tuili_save_txt_f.py
+++ b/推理图片反向上传CVAT/detect/tuili_save_txt_f.py
@ -0,0 +1,135 @@
+import os
+import cv2
+from pathlib import Path
+from ultralytics import YOLO
+
+IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'}
+
+
+class ObjectDetector:
+    """封装 YOLO 目标检测模型"""
+
+    def __init__(self, model_path):
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"模型文件不存在: {model_path}")
+        self.model = YOLO(model_path)
+        print(f"[INFO] 成功加载 YOLO 目标检测模型: {model_path}")
+
+    def detect(self, img_np, conf_threshold=0.0):
+        """返回所有置信度 >= conf_threshold 的检测结果"""
+        results = self.model.predict(img_np, conf=conf_threshold, verbose=False)
+        detections = []
+        for result in results:
+            boxes = result.boxes.cpu().numpy()
+            for box in boxes:
+                detection_info = {
+                    'bbox_xyxy': box.xyxy[0],  # [x1, y1, x2, y2]
+                    'confidence': float(box.conf.item()),
+                    'class_id': int(box.cls.item())
+                }
+                detections.append(detection_info)
+        return detections
+
+
+def save_yolo_detect_labels_from_folder(
+        model_path,
+        image_dir,
+        output_dir,
+        conf_threshold=0.5,
+        label_map={0: "hole", 1: "crack"}  # 可选，仅用于日志
+):
+    """
+    对 image_dir 中所有图像进行 YOLO Detect 推理，
+    每个类别保留最高置信度框，保存为 YOLO 格式的 .txt 标签文件。
+
+    YOLO 格式: <class_id> <cx_norm> <cy_norm> <w_norm> <h_norm>
+    """
+    image_dir = Path(image_dir)
+    output_dir = Path(output_dir)
+    labels_dir = output_dir / "labels"
+    labels_dir.mkdir(parents=True, exist_ok=True)
+
+    # 获取图像列表
+    image_files = [
+        f for f in sorted(os.listdir(image_dir))
+        if os.path.splitext(f.lower())[1] in IMG_EXTENSIONS
+    ]
+    if not image_files:
+        print(f"❌ 未在 {image_dir} 中找到支持的图像文件")
+        return
+
+    print(f"共找到 {len(image_files)} 张图像，开始推理...")
+    detector = ObjectDetector(model_path)
+
+    for img_filename in image_files:
+        img_path = image_dir / img_filename
+        stem = Path(img_filename).stem
+        txt_path = labels_dir / f"{stem}.txt"
+
+        # 读图
+        img = cv2.imread(str(img_path))
+        if img is None:
+            print(f"⚠️ 跳过无效图像: {img_path}")
+            txt_path.write_text("")  # 写空文件
+            continue
+
+        H, W = img.shape[:2]
+
+        # 推理（获取所有 ≥ conf_threshold 的框）
+        all_detections = detector.detect(img, conf_threshold=conf_threshold)
+
+        # 按类别保留最高置信度框
+        best_per_class = {}
+        for det in all_detections:
+            cls_id = det['class_id']
+            if cls_id not in best_per_class or det['confidence'] > best_per_class[cls_id]['confidence']:
+                best_per_class[cls_id] = det
+
+        top_detections = list(best_per_class.values())
+
+        # 转为 YOLO 格式并写入
+        lines = []
+        for det in top_detections:
+            x1, y1, x2, y2 = det['bbox_xyxy']
+            cx = (x1 + x2) / 2.0
+            cy = (y1 + y2) / 2.0
+            bw = x2 - x1
+            bh = y2 - y1
+
+            # 归一化
+            cx_norm = cx / W
+            cy_norm = cy / H
+            w_norm = bw / W
+            h_norm = bh / H
+
+            # 限制在 [0, 1]
+            cx_norm = max(0.0, min(1.0, cx_norm))
+            cy_norm = max(0.0, min(1.0, cy_norm))
+            w_norm = max(0.0, min(1.0, w_norm))
+            h_norm = max(0.0, min(1.0, h_norm))
+
+            line = f"{det['class_id']} {cx_norm:.6f} {cy_norm:.6f} {w_norm:.6f} {h_norm:.6f}"
+            lines.append(line)
+
+        # 写入标签文件
+        with open(txt_path, "w") as f:
+            if lines:
+                f.write("\n".join(lines) + "\n")
+
+        print(f"✅ {img_filename} -> {len(lines)} 个检测框已保存")
+
+    print(f"\n🎉 全部完成！标签文件保存在: {labels_dir}")
+
+
+# ------------------- 主函数调用 -------------------
+if __name__ == "__main__":
+    MODEL_PATH = "/home/hx/yolo/ultralytics_yolo11-main/runs/train/exp_detect/weights/best.pt"
+    IMAGE_DIR = "/home/hx/开发/ML_xiantiao/class_xiantiao_pc/test_image/train"
+    OUTPUT_DIR = "./inference_results"
+
+    save_yolo_detect_labels_from_folder(
+        model_path=MODEL_PATH,
+        image_dir=IMAGE_DIR,
+        output_dir=OUTPUT_DIR,
+        conf_threshold=0.5
+    )
--- a/推理图片反向上传CVAT/point/point_test.py
+++ b/推理图片反向上传CVAT/point/point_test.py
@ -5,7 +5,7 @@ import os

 # ====================== 用户配置 ======================
 MODEL_PATH = 'point.pt'
-IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251208'
+IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212'
 OUTPUT_DIR = './keypoints_txt'

 IMG_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.tif', '.webp'}
--- a/推理图片反向上传CVAT/point/trans_pointtocvat.py
+++ b/推理图片反向上传CVAT/point/trans_pointtocvat.py
@ -11,7 +11,7 @@ labels_dir = "keypoints_txt"
 output_xml = "annotations_cvat.xml"

 # 图片目录（用于 width/height）
-images_dir = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251208"
+images_dir = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212"

 # 类别映射
 class_mapping_reverse = {