import os import glob def is_point_in_bbox(px, py, x_center, y_center, w, h, tolerance=1e-3): """ 判断归一化坐标下的点 (px, py) 是否在 bbox 范围内 允许微小越界(如浮点误差) """ x_min = x_center - w / 2 - tolerance x_max = x_center + w / 2 + tolerance y_min = y_center - h / 2 - tolerance y_max = y_center + h / 2 + tolerance return x_min <= px <= x_max and y_min <= py <= y_max def check_txt_file(txt_path, img_dir, delete=True): """ 检查一个 .txt 文件: - 每行:1 个目标(1 个 bbox + 3 个关键点) - 每个关键点格式:(x, y, v) → v 是可见性,我们只关心 x, y - 如果任意 1 个关键点超出其 bbox → 标记为无效,删除 txt + jpg """ try: with open(txt_path, 'r') as f: lines = f.readlines() except Exception as e: print(f"⚠️ 无法读取文件 {txt_path}: {e}") return False for line_num, line in enumerate(lines): parts = line.strip().split() if not parts or len(parts) < 5: continue # 跳过空行或无效行 try: # 解析 bbox (归一化) class_id = int(parts[0]) x_c = float(parts[1]) y_c = float(parts[2]) w = float(parts[3]) h = float(parts[4]) # 检查 bbox 合法性 if not (0 <= x_c <= 1 and 0 <= y_c <= 1 and 0 < w <= 1 and 0 < h <= 1): print(f"❌ 行 {line_num+1}: bbox 值非法") if delete: return True # 触发删除 # 提取 3 个关键点 (每 3 个值一组: x, y, v) keypoints = parts[5:] if len(keypoints) != 9: print(f"❌ 行 {line_num+1}: 关键点数量不为 3 (实际: {len(keypoints)//3})") if delete: return True for i in range(3): kx = float(keypoints[i*3]) ky = float(keypoints[i*3 + 1]) # kv = int(keypoints[i*3 + 2]) # v 可见性,此处不用 if not is_point_in_bbox(kx, ky, x_c, y_c, w, h): print(f"🚨 行 {line_num+1}: 关键点 {i+1} ({kx:.4f}, {ky:.4f}) 超出 bbox [{x_c-w/2:.4f}, {x_c+w/2:.4f}] x [{y_c-h/2:.4f}, {y_c+h/2:.4f}]") if delete: return True # 只要有一个越界,立即决定删除 except Exception as e: print(f"❌ 行 {line_num+1} 解析出错: {e}") if delete: return True return False # 无需删除 def clean_invalid_files(labels_dir, images_dir, delete=True): """ 遍历 labels_dir 下所有 .txt 文件,检查并删除异常文件 """ txt_files = glob.glob(os.path.join(labels_dir, "*.txt")) deleted_count = 0 print(f"🔍 开始检查 {len(txt_files)} 个标注文件...") for txt_file in txt_files: base_name = os.path.splitext(os.path.basename(txt_file))[0] img_file = os.path.join(images_dir, base_name + ".jpg") should_delete = check_txt_file(txt_file, images_dir, delete=delete) if should_delete: print(f"💥 异常文件: {base_name}.txt") try: if os.path.exists(txt_file): os.remove(txt_file) print(f"🗑️ 已删除: {txt_file}") if os.path.exists(img_file): os.remove(img_file) print(f"🗑️ 已删除: {img_file}") deleted_count += 1 except Exception as e: print(f"❌ 删除文件时出错: {e}") print(f"\n✅ 检查完成!共删除 {deleted_count} 组异常文件 (txt + jpg)") # =============== 用户配置区 =============== LABELS_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/labels_keypoints" IMAGES_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/yolodataset/point1/f11" # 请确保这是图片所在目录 DELETE_FILES = True # 设为 False 可先预览哪些文件会被删 # ======================================== if __name__ == "__main__": clean_invalid_files(LABELS_DIR, IMAGES_DIR, delete=DELETE_FILES)