import os import glob def validate_and_delete(labels_dir, images_dir, delete=True): """ 检查 labels_dir 中所有 .txt 文件: - 每行必须是 14 个字段(1+4+9) - 坐标在 [0,1] 范围内 - 关键点 v 值在 {0,1,2} - 任意错误 → 删除 txt + jpg """ txt_files = sorted(glob.glob(os.path.join(labels_dir, "*.txt"))) print(f"🔍 开始检查 {len(txt_files)} 个标注文件...\n") deleted_count = 0 for txt_file in txt_files: base_name = os.path.splitext(os.path.basename(txt_file))[0] img_file = os.path.join(images_dir, base_name + ".jpg") invalid = False # 标记是否异常 try: with open(txt_file, 'r', encoding='utf-8') as f: lines = f.readlines() except Exception as e: print(f"❌ 无法读取文件: {txt_file} | 错误: {e}") invalid = True else: if not lines: print(f"❌ 文件为空: {txt_file}") invalid = True else: for line_idx, line in enumerate(lines): parts = line.strip().split() if len(parts) == 0: continue # 跳过空行 # 检查字段数量 if len(parts) != 14: print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 字段数错误 ({len(parts)} != 14)") invalid = True break try: values = [float(x) for x in parts] # 检查 NaN if any(x != x for x in values): print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 包含 NaN") invalid = True break # 检查 class_id cls = int(values[0]) if cls < 0: print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: class_id {cls} < 0") invalid = True break # 检查 bbox x_c, y_c, w, h = values[1:5] if not (0 <= x_c <= 1 and 0 <= y_c <= 1 and 0 < w <= 1 and 0 < h <= 1): print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: bbox 越界 (x_c={x_c:.4f}, y_c={y_c:.4f}, w={w:.4f}, h={h:.4f})") invalid = True break # 检查 3 个关键点 kpts = values[5:] for i in range(3): kx = kpts[i*3] ky = kpts[i*3+1] v = kpts[i*3+2] if not (0 <= kx <= 1 and 0 <= ky <= 1): print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 关键点 {i+1} 坐标越界 (kx={kx:.4f}, ky={ky:.4f})") invalid = True break if v not in (0, 1, 2): print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 关键点 {i+1} v={v},应为 0/1/2") invalid = True break if invalid: break # 跳出关键点循环 except ValueError as e: print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 数据格式错误 -> {parts}") invalid = True break # === 执行删除 === if invalid: try: if os.path.exists(txt_file): os.remove(txt_file) print(f"🗑️ 已删除: {txt_file}") if os.path.exists(img_file): os.remove(img_file) print(f"🗑️ 已删除: {img_file}") deleted_count += 1 print(f"💥 已清理: {base_name}\n") except Exception as e: print(f"❌ 删除文件失败 {txt_file} 或 {img_file}: {e}\n") print(f"\n✅ 检查完成!共删除 {deleted_count} 组异常文件 (txt + jpg)") # =============== 用户配置区 =============== LABELS_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/labels_keypoints" IMAGES_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/f11" DELETE_FILES = True # 设为 False 可预览哪些文件会被删 # ✅ 必须添加这一行才能运行! validate_and_delete(LABELS_DIR, IMAGES_DIR, delete=DELETE_FILES) # ========================================