112 lines
4.7 KiB
Python
112 lines
4.7 KiB
Python
|
|
import os
|
|||
|
|
import glob
|
|||
|
|
|
|||
|
|
def validate_and_delete(labels_dir, images_dir, delete=True):
|
|||
|
|
"""
|
|||
|
|
检查 labels_dir 中所有 .txt 文件:
|
|||
|
|
- 每行必须是 14 个字段(1+4+9)
|
|||
|
|
- 坐标在 [0,1] 范围内
|
|||
|
|
- 关键点 v 值在 {0,1,2}
|
|||
|
|
- 任意错误 → 删除 txt + jpg
|
|||
|
|
"""
|
|||
|
|
txt_files = sorted(glob.glob(os.path.join(labels_dir, "*.txt")))
|
|||
|
|
print(f"🔍 开始检查 {len(txt_files)} 个标注文件...\n")
|
|||
|
|
deleted_count = 0
|
|||
|
|
|
|||
|
|
for txt_file in txt_files:
|
|||
|
|
base_name = os.path.splitext(os.path.basename(txt_file))[0]
|
|||
|
|
img_file = os.path.join(images_dir, base_name + ".jpg")
|
|||
|
|
invalid = False # 标记是否异常
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
with open(txt_file, 'r', encoding='utf-8') as f:
|
|||
|
|
lines = f.readlines()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 无法读取文件: {txt_file} | 错误: {e}")
|
|||
|
|
invalid = True
|
|||
|
|
else:
|
|||
|
|
if not lines:
|
|||
|
|
print(f"❌ 文件为空: {txt_file}")
|
|||
|
|
invalid = True
|
|||
|
|
else:
|
|||
|
|
for line_idx, line in enumerate(lines):
|
|||
|
|
parts = line.strip().split()
|
|||
|
|
if len(parts) == 0:
|
|||
|
|
continue # 跳过空行
|
|||
|
|
|
|||
|
|
# 检查字段数量
|
|||
|
|
if len(parts) != 14:
|
|||
|
|
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 字段数错误 ({len(parts)} != 14)")
|
|||
|
|
invalid = True
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
values = [float(x) for x in parts]
|
|||
|
|
|
|||
|
|
# 检查 NaN
|
|||
|
|
if any(x != x for x in values):
|
|||
|
|
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 包含 NaN")
|
|||
|
|
invalid = True
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# 检查 class_id
|
|||
|
|
cls = int(values[0])
|
|||
|
|
if cls < 0:
|
|||
|
|
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: class_id {cls} < 0")
|
|||
|
|
invalid = True
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# 检查 bbox
|
|||
|
|
x_c, y_c, w, h = values[1:5]
|
|||
|
|
if not (0 <= x_c <= 1 and 0 <= y_c <= 1 and 0 < w <= 1 and 0 < h <= 1):
|
|||
|
|
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: bbox 越界 (x_c={x_c:.4f}, y_c={y_c:.4f}, w={w:.4f}, h={h:.4f})")
|
|||
|
|
invalid = True
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# 检查 3 个关键点
|
|||
|
|
kpts = values[5:]
|
|||
|
|
for i in range(3):
|
|||
|
|
kx = kpts[i*3]
|
|||
|
|
ky = kpts[i*3+1]
|
|||
|
|
v = kpts[i*3+2]
|
|||
|
|
|
|||
|
|
if not (0 <= kx <= 1 and 0 <= ky <= 1):
|
|||
|
|
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 关键点 {i+1} 坐标越界 (kx={kx:.4f}, ky={ky:.4f})")
|
|||
|
|
invalid = True
|
|||
|
|
break
|
|||
|
|
if v not in (0, 1, 2):
|
|||
|
|
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 关键点 {i+1} v={v},应为 0/1/2")
|
|||
|
|
invalid = True
|
|||
|
|
break
|
|||
|
|
if invalid:
|
|||
|
|
break # 跳出关键点循环
|
|||
|
|
|
|||
|
|
except ValueError as e:
|
|||
|
|
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 数据格式错误 -> {parts}")
|
|||
|
|
invalid = True
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# === 执行删除 ===
|
|||
|
|
if invalid:
|
|||
|
|
try:
|
|||
|
|
if os.path.exists(txt_file):
|
|||
|
|
os.remove(txt_file)
|
|||
|
|
print(f"🗑️ 已删除: {txt_file}")
|
|||
|
|
if os.path.exists(img_file):
|
|||
|
|
os.remove(img_file)
|
|||
|
|
print(f"🗑️ 已删除: {img_file}")
|
|||
|
|
deleted_count += 1
|
|||
|
|
print(f"💥 已清理: {base_name}\n")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 删除文件失败 {txt_file} 或 {img_file}: {e}\n")
|
|||
|
|
|
|||
|
|
print(f"\n✅ 检查完成!共删除 {deleted_count} 组异常文件 (txt + jpg)")
|
|||
|
|
|
|||
|
|
|
|||
|
|
# =============== 用户配置区 ===============
|
|||
|
|
LABELS_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/labels_keypoints"
|
|||
|
|
IMAGES_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/f11"
|
|||
|
|
DELETE_FILES = True # 设为 False 可预览哪些文件会被删
|
|||
|
|
# ✅ 必须添加这一行才能运行!
|
|||
|
|
validate_and_delete(LABELS_DIR, IMAGES_DIR, delete=DELETE_FILES)
|
|||
|
|
# ========================================
|