import os from pathlib import Path # ==================== 配置 ==================== FOLDER_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/11c" # 替换为你的数据目录 IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'} DRY_RUN = False # True: 仅预览;False: 真实删除 # ============================================== def main(): folder = Path(FOLDER_PATH) if not folder.exists(): print(f"❌ 目录不存在: {folder}") return # 获取所有图片(stem -> full path) img_files = {} for ext in IMG_EXTENSIONS: for f in folder.glob(f"*{ext}"): img_files[f.stem] = f # 获取所有 txt 文件(stem -> full path) txt_files = {} for f in folder.glob("*.txt"): txt_files[f.stem] = f all_stems = set(img_files.keys()) | set(txt_files.keys()) to_delete_imgs = [] to_delete_txts = [] for stem in sorted(all_stems): has_img = stem in img_files has_txt = stem in txt_files if not has_img and has_txt: to_delete_txts.append(txt_files[stem]) elif has_img and not has_txt: to_delete_imgs.append(img_files[stem]) # 如果都有,保留;如果都无(理论上不会),跳过 # 输出结果 print("🔍 扫描完成。") print(f"🖼️ 图片数量: {len(img_files)}") print(f"📄 TXT 数量 : {len(txt_files)}") print() if to_delete_imgs: print("🗑️ 将删除无对应 TXT 的图片:") for f in to_delete_imgs: print(f" - {f}") else: print("✅ 所有图片都有对应的 TXT") if to_delete_txts: print("\n🗑️ 将删除无对应图片的 TXT:") for f in to_delete_txts: print(f" - {f}") else: print("✅ 所有 TXT 都有对应的图片") total_to_delete = len(to_delete_imgs) + len(to_delete_txts) print(f"\n📌 总共将删除 {total_to_delete} 个文件") if DRY_RUN: print("\n💡 当前为预览模式 (DRY_RUN=True),未执行真实删除。") print(" 如需真实删除,请将 DRY_RUN 改为 False 并重新运行。") else: print("\n⚠️ 正在执行删除...") for f in to_delete_imgs + to_delete_txts: try: f.unlink() print(f"✅ 已删除: {f}") except Exception as e: print(f"❌ 删除失败: {f} - {e}") print("✅ 清理完成!") if __name__ == "__main__": main()