109 lines
4.2 KiB
Python
109 lines
4.2 KiB
Python
|
|
import os
|
|||
|
|
import glob
|
|||
|
|
|
|||
|
|
def is_point_in_bbox(px, py, x_center, y_center, w, h, tolerance=1e-3):
|
|||
|
|
"""
|
|||
|
|
判断归一化坐标下的点 (px, py) 是否在 bbox 范围内
|
|||
|
|
允许微小越界(如浮点误差)
|
|||
|
|
"""
|
|||
|
|
x_min = x_center - w / 2 - tolerance
|
|||
|
|
x_max = x_center + w / 2 + tolerance
|
|||
|
|
y_min = y_center - h / 2 - tolerance
|
|||
|
|
y_max = y_center + h / 2 + tolerance
|
|||
|
|
return x_min <= px <= x_max and y_min <= py <= y_max
|
|||
|
|
|
|||
|
|
def check_txt_file(txt_path, img_dir, delete=True):
|
|||
|
|
"""
|
|||
|
|
检查一个 .txt 文件:
|
|||
|
|
- 每行:1 个目标(1 个 bbox + 3 个关键点)
|
|||
|
|
- 每个关键点格式:(x, y, v) → v 是可见性,我们只关心 x, y
|
|||
|
|
- 如果任意 1 个关键点超出其 bbox → 标记为无效,删除 txt + jpg
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
with open(txt_path, 'r') as f:
|
|||
|
|
lines = f.readlines()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"⚠️ 无法读取文件 {txt_path}: {e}")
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
for line_num, line in enumerate(lines):
|
|||
|
|
parts = line.strip().split()
|
|||
|
|
if not parts or len(parts) < 5:
|
|||
|
|
continue # 跳过空行或无效行
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 解析 bbox (归一化)
|
|||
|
|
class_id = int(parts[0])
|
|||
|
|
x_c = float(parts[1])
|
|||
|
|
y_c = float(parts[2])
|
|||
|
|
w = float(parts[3])
|
|||
|
|
h = float(parts[4])
|
|||
|
|
|
|||
|
|
# 检查 bbox 合法性
|
|||
|
|
if not (0 <= x_c <= 1 and 0 <= y_c <= 1 and 0 < w <= 1 and 0 < h <= 1):
|
|||
|
|
print(f"❌ 行 {line_num+1}: bbox 值非法")
|
|||
|
|
if delete:
|
|||
|
|
return True # 触发删除
|
|||
|
|
|
|||
|
|
# 提取 3 个关键点 (每 3 个值一组: x, y, v)
|
|||
|
|
keypoints = parts[5:]
|
|||
|
|
if len(keypoints) != 9:
|
|||
|
|
print(f"❌ 行 {line_num+1}: 关键点数量不为 3 (实际: {len(keypoints)//3})")
|
|||
|
|
if delete:
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
for i in range(3):
|
|||
|
|
kx = float(keypoints[i*3])
|
|||
|
|
ky = float(keypoints[i*3 + 1])
|
|||
|
|
# kv = int(keypoints[i*3 + 2]) # v 可见性,此处不用
|
|||
|
|
|
|||
|
|
if not is_point_in_bbox(kx, ky, x_c, y_c, w, h):
|
|||
|
|
print(f"🚨 行 {line_num+1}: 关键点 {i+1} ({kx:.4f}, {ky:.4f}) 超出 bbox [{x_c-w/2:.4f}, {x_c+w/2:.4f}] x [{y_c-h/2:.4f}, {y_c+h/2:.4f}]")
|
|||
|
|
if delete:
|
|||
|
|
return True # 只要有一个越界,立即决定删除
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 行 {line_num+1} 解析出错: {e}")
|
|||
|
|
if delete:
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
return False # 无需删除
|
|||
|
|
|
|||
|
|
def clean_invalid_files(labels_dir, images_dir, delete=True):
|
|||
|
|
"""
|
|||
|
|
遍历 labels_dir 下所有 .txt 文件,检查并删除异常文件
|
|||
|
|
"""
|
|||
|
|
txt_files = glob.glob(os.path.join(labels_dir, "*.txt"))
|
|||
|
|
deleted_count = 0
|
|||
|
|
|
|||
|
|
print(f"🔍 开始检查 {len(txt_files)} 个标注文件...")
|
|||
|
|
|
|||
|
|
for txt_file in txt_files:
|
|||
|
|
base_name = os.path.splitext(os.path.basename(txt_file))[0]
|
|||
|
|
img_file = os.path.join(images_dir, base_name + ".jpg")
|
|||
|
|
|
|||
|
|
should_delete = check_txt_file(txt_file, images_dir, delete=delete)
|
|||
|
|
|
|||
|
|
if should_delete:
|
|||
|
|
print(f"💥 异常文件: {base_name}.txt")
|
|||
|
|
try:
|
|||
|
|
if os.path.exists(txt_file):
|
|||
|
|
os.remove(txt_file)
|
|||
|
|
print(f"🗑️ 已删除: {txt_file}")
|
|||
|
|
if os.path.exists(img_file):
|
|||
|
|
os.remove(img_file)
|
|||
|
|
print(f"🗑️ 已删除: {img_file}")
|
|||
|
|
deleted_count += 1
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 删除文件时出错: {e}")
|
|||
|
|
|
|||
|
|
print(f"\n✅ 检查完成!共删除 {deleted_count} 组异常文件 (txt + jpg)")
|
|||
|
|
|
|||
|
|
# =============== 用户配置区 ===============
|
|||
|
|
LABELS_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/labels_keypoints"
|
|||
|
|
IMAGES_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/f11" # 请确保这是图片所在目录
|
|||
|
|
DELETE_FILES = True # 设为 False 可先预览哪些文件会被删
|
|||
|
|
# ========================================
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
clean_invalid_files(LABELS_DIR, IMAGES_DIR, delete=DELETE_FILES)
|