112 lines
4.7 KiB
Python
112 lines
4.7 KiB
Python
import os
|
||
import glob
|
||
|
||
def validate_and_delete(labels_dir, images_dir, delete=True):
|
||
"""
|
||
检查 labels_dir 中所有 .txt 文件:
|
||
- 每行必须是 14 个字段(1+4+9)
|
||
- 坐标在 [0,1] 范围内
|
||
- 关键点 v 值在 {0,1,2}
|
||
- 任意错误 → 删除 txt + jpg
|
||
"""
|
||
txt_files = sorted(glob.glob(os.path.join(labels_dir, "*.txt")))
|
||
print(f"🔍 开始检查 {len(txt_files)} 个标注文件...\n")
|
||
deleted_count = 0
|
||
|
||
for txt_file in txt_files:
|
||
base_name = os.path.splitext(os.path.basename(txt_file))[0]
|
||
img_file = os.path.join(images_dir, base_name + ".jpg")
|
||
invalid = False # 标记是否异常
|
||
|
||
try:
|
||
with open(txt_file, 'r', encoding='utf-8') as f:
|
||
lines = f.readlines()
|
||
except Exception as e:
|
||
print(f"❌ 无法读取文件: {txt_file} | 错误: {e}")
|
||
invalid = True
|
||
else:
|
||
if not lines:
|
||
print(f"❌ 文件为空: {txt_file}")
|
||
invalid = True
|
||
else:
|
||
for line_idx, line in enumerate(lines):
|
||
parts = line.strip().split()
|
||
if len(parts) == 0:
|
||
continue # 跳过空行
|
||
|
||
# 检查字段数量
|
||
if len(parts) != 14:
|
||
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 字段数错误 ({len(parts)} != 14)")
|
||
invalid = True
|
||
break
|
||
|
||
try:
|
||
values = [float(x) for x in parts]
|
||
|
||
# 检查 NaN
|
||
if any(x != x for x in values):
|
||
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 包含 NaN")
|
||
invalid = True
|
||
break
|
||
|
||
# 检查 class_id
|
||
cls = int(values[0])
|
||
if cls < 0:
|
||
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: class_id {cls} < 0")
|
||
invalid = True
|
||
break
|
||
|
||
# 检查 bbox
|
||
x_c, y_c, w, h = values[1:5]
|
||
if not (0 <= x_c <= 1 and 0 <= y_c <= 1 and 0 < w <= 1 and 0 < h <= 1):
|
||
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: bbox 越界 (x_c={x_c:.4f}, y_c={y_c:.4f}, w={w:.4f}, h={h:.4f})")
|
||
invalid = True
|
||
break
|
||
|
||
# 检查 3 个关键点
|
||
kpts = values[5:]
|
||
for i in range(3):
|
||
kx = kpts[i*3]
|
||
ky = kpts[i*3+1]
|
||
v = kpts[i*3+2]
|
||
|
||
if not (0 <= kx <= 1 and 0 <= ky <= 1):
|
||
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 关键点 {i+1} 坐标越界 (kx={kx:.4f}, ky={ky:.4f})")
|
||
invalid = True
|
||
break
|
||
if v not in (0, 1, 2):
|
||
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 关键点 {i+1} v={v},应为 0/1/2")
|
||
invalid = True
|
||
break
|
||
if invalid:
|
||
break # 跳出关键点循环
|
||
|
||
except ValueError as e:
|
||
print(f"🚨 {base_name}.txt | 第 {line_idx+1} 行: 数据格式错误 -> {parts}")
|
||
invalid = True
|
||
break
|
||
|
||
# === 执行删除 ===
|
||
if invalid:
|
||
try:
|
||
if os.path.exists(txt_file):
|
||
os.remove(txt_file)
|
||
print(f"🗑️ 已删除: {txt_file}")
|
||
if os.path.exists(img_file):
|
||
os.remove(img_file)
|
||
print(f"🗑️ 已删除: {img_file}")
|
||
deleted_count += 1
|
||
print(f"💥 已清理: {base_name}\n")
|
||
except Exception as e:
|
||
print(f"❌ 删除文件失败 {txt_file} 或 {img_file}: {e}\n")
|
||
|
||
print(f"\n✅ 检查完成!共删除 {deleted_count} 组异常文件 (txt + jpg)")
|
||
|
||
|
||
# =============== 用户配置区 ===============
|
||
LABELS_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/labels_keypoints"
|
||
IMAGES_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/f11"
|
||
DELETE_FILES = True # 设为 False 可预览哪些文件会被删
|
||
# ✅ 必须添加这一行才能运行!
|
||
validate_and_delete(LABELS_DIR, IMAGES_DIR, delete=DELETE_FILES)
|
||
# ======================================== |