79 lines
2.5 KiB
Python
79 lines
2.5 KiB
Python
|
|
import os
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
# ==================== 配置 ====================
|
|||
|
|
FOLDER_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/11c" # 替换为你的数据目录
|
|||
|
|
IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'}
|
|||
|
|
DRY_RUN = False # True: 仅预览;False: 真实删除
|
|||
|
|
# ==============================================
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
folder = Path(FOLDER_PATH)
|
|||
|
|
if not folder.exists():
|
|||
|
|
print(f"❌ 目录不存在: {folder}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 获取所有图片(stem -> full path)
|
|||
|
|
img_files = {}
|
|||
|
|
for ext in IMG_EXTENSIONS:
|
|||
|
|
for f in folder.glob(f"*{ext}"):
|
|||
|
|
img_files[f.stem] = f
|
|||
|
|
|
|||
|
|
# 获取所有 txt 文件(stem -> full path)
|
|||
|
|
txt_files = {}
|
|||
|
|
for f in folder.glob("*.txt"):
|
|||
|
|
txt_files[f.stem] = f
|
|||
|
|
|
|||
|
|
all_stems = set(img_files.keys()) | set(txt_files.keys())
|
|||
|
|
|
|||
|
|
to_delete_imgs = []
|
|||
|
|
to_delete_txts = []
|
|||
|
|
|
|||
|
|
for stem in sorted(all_stems):
|
|||
|
|
has_img = stem in img_files
|
|||
|
|
has_txt = stem in txt_files
|
|||
|
|
|
|||
|
|
if not has_img and has_txt:
|
|||
|
|
to_delete_txts.append(txt_files[stem])
|
|||
|
|
elif has_img and not has_txt:
|
|||
|
|
to_delete_imgs.append(img_files[stem])
|
|||
|
|
# 如果都有,保留;如果都无(理论上不会),跳过
|
|||
|
|
|
|||
|
|
# 输出结果
|
|||
|
|
print("🔍 扫描完成。")
|
|||
|
|
print(f"🖼️ 图片数量: {len(img_files)}")
|
|||
|
|
print(f"📄 TXT 数量 : {len(txt_files)}")
|
|||
|
|
print()
|
|||
|
|
|
|||
|
|
if to_delete_imgs:
|
|||
|
|
print("🗑️ 将删除无对应 TXT 的图片:")
|
|||
|
|
for f in to_delete_imgs:
|
|||
|
|
print(f" - {f}")
|
|||
|
|
else:
|
|||
|
|
print("✅ 所有图片都有对应的 TXT")
|
|||
|
|
|
|||
|
|
if to_delete_txts:
|
|||
|
|
print("\n🗑️ 将删除无对应图片的 TXT:")
|
|||
|
|
for f in to_delete_txts:
|
|||
|
|
print(f" - {f}")
|
|||
|
|
else:
|
|||
|
|
print("✅ 所有 TXT 都有对应的图片")
|
|||
|
|
|
|||
|
|
total_to_delete = len(to_delete_imgs) + len(to_delete_txts)
|
|||
|
|
print(f"\n📌 总共将删除 {total_to_delete} 个文件")
|
|||
|
|
|
|||
|
|
if DRY_RUN:
|
|||
|
|
print("\n💡 当前为预览模式 (DRY_RUN=True),未执行真实删除。")
|
|||
|
|
print(" 如需真实删除,请将 DRY_RUN 改为 False 并重新运行。")
|
|||
|
|
else:
|
|||
|
|
print("\n⚠️ 正在执行删除...")
|
|||
|
|
for f in to_delete_imgs + to_delete_txts:
|
|||
|
|
try:
|
|||
|
|
f.unlink()
|
|||
|
|
print(f"✅ 已删除: {f}")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 删除失败: {f} - {e}")
|
|||
|
|
print("✅ 清理完成!")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|