109 lines
4.2 KiB
Python
109 lines
4.2 KiB
Python
import os
|
||
import glob
|
||
|
||
def is_point_in_bbox(px, py, x_center, y_center, w, h, tolerance=1e-3):
|
||
"""
|
||
判断归一化坐标下的点 (px, py) 是否在 bbox 范围内
|
||
允许微小越界(如浮点误差)
|
||
"""
|
||
x_min = x_center - w / 2 - tolerance
|
||
x_max = x_center + w / 2 + tolerance
|
||
y_min = y_center - h / 2 - tolerance
|
||
y_max = y_center + h / 2 + tolerance
|
||
return x_min <= px <= x_max and y_min <= py <= y_max
|
||
|
||
def check_txt_file(txt_path, img_dir, delete=True):
|
||
"""
|
||
检查一个 .txt 文件:
|
||
- 每行:1 个目标(1 个 bbox + 3 个关键点)
|
||
- 每个关键点格式:(x, y, v) → v 是可见性,我们只关心 x, y
|
||
- 如果任意 1 个关键点超出其 bbox → 标记为无效,删除 txt + jpg
|
||
"""
|
||
try:
|
||
with open(txt_path, 'r') as f:
|
||
lines = f.readlines()
|
||
except Exception as e:
|
||
print(f"⚠️ 无法读取文件 {txt_path}: {e}")
|
||
return False
|
||
|
||
for line_num, line in enumerate(lines):
|
||
parts = line.strip().split()
|
||
if not parts or len(parts) < 5:
|
||
continue # 跳过空行或无效行
|
||
|
||
try:
|
||
# 解析 bbox (归一化)
|
||
class_id = int(parts[0])
|
||
x_c = float(parts[1])
|
||
y_c = float(parts[2])
|
||
w = float(parts[3])
|
||
h = float(parts[4])
|
||
|
||
# 检查 bbox 合法性
|
||
if not (0 <= x_c <= 1 and 0 <= y_c <= 1 and 0 < w <= 1 and 0 < h <= 1):
|
||
print(f"❌ 行 {line_num+1}: bbox 值非法")
|
||
if delete:
|
||
return True # 触发删除
|
||
|
||
# 提取 3 个关键点 (每 3 个值一组: x, y, v)
|
||
keypoints = parts[5:]
|
||
if len(keypoints) != 9:
|
||
print(f"❌ 行 {line_num+1}: 关键点数量不为 3 (实际: {len(keypoints)//3})")
|
||
if delete:
|
||
return True
|
||
|
||
for i in range(3):
|
||
kx = float(keypoints[i*3])
|
||
ky = float(keypoints[i*3 + 1])
|
||
# kv = int(keypoints[i*3 + 2]) # v 可见性,此处不用
|
||
|
||
if not is_point_in_bbox(kx, ky, x_c, y_c, w, h):
|
||
print(f"🚨 行 {line_num+1}: 关键点 {i+1} ({kx:.4f}, {ky:.4f}) 超出 bbox [{x_c-w/2:.4f}, {x_c+w/2:.4f}] x [{y_c-h/2:.4f}, {y_c+h/2:.4f}]")
|
||
if delete:
|
||
return True # 只要有一个越界,立即决定删除
|
||
|
||
except Exception as e:
|
||
print(f"❌ 行 {line_num+1} 解析出错: {e}")
|
||
if delete:
|
||
return True
|
||
|
||
return False # 无需删除
|
||
|
||
def clean_invalid_files(labels_dir, images_dir, delete=True):
|
||
"""
|
||
遍历 labels_dir 下所有 .txt 文件,检查并删除异常文件
|
||
"""
|
||
txt_files = glob.glob(os.path.join(labels_dir, "*.txt"))
|
||
deleted_count = 0
|
||
|
||
print(f"🔍 开始检查 {len(txt_files)} 个标注文件...")
|
||
|
||
for txt_file in txt_files:
|
||
base_name = os.path.splitext(os.path.basename(txt_file))[0]
|
||
img_file = os.path.join(images_dir, base_name + ".jpg")
|
||
|
||
should_delete = check_txt_file(txt_file, images_dir, delete=delete)
|
||
|
||
if should_delete:
|
||
print(f"💥 异常文件: {base_name}.txt")
|
||
try:
|
||
if os.path.exists(txt_file):
|
||
os.remove(txt_file)
|
||
print(f"🗑️ 已删除: {txt_file}")
|
||
if os.path.exists(img_file):
|
||
os.remove(img_file)
|
||
print(f"🗑️ 已删除: {img_file}")
|
||
deleted_count += 1
|
||
except Exception as e:
|
||
print(f"❌ 删除文件时出错: {e}")
|
||
|
||
print(f"\n✅ 检查完成!共删除 {deleted_count} 组异常文件 (txt + jpg)")
|
||
|
||
# =============== 用户配置区 ===============
|
||
LABELS_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/labels_keypoints"
|
||
IMAGES_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/f11" # 请确保这是图片所在目录
|
||
DELETE_FILES = True # 设为 False 可先预览哪些文件会被删
|
||
# ========================================
|
||
|
||
if __name__ == "__main__":
|
||
clean_invalid_files(LABELS_DIR, IMAGES_DIR, delete=DELETE_FILES) |