Files
zjsh_yolov11/tool/del_duoyutxt-image.py

79 lines
2.5 KiB
Python
Raw Permalink Normal View History

2025-12-11 08:37:09 +08:00
import os
from pathlib import Path
# ==================== 配置 ====================
FOLDER_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/11c" # 替换为你的数据目录
IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'}
DRY_RUN = False # True: 仅预览False: 真实删除
# ==============================================
def main():
folder = Path(FOLDER_PATH)
if not folder.exists():
print(f"❌ 目录不存在: {folder}")
return
# 获取所有图片stem -> full path
img_files = {}
for ext in IMG_EXTENSIONS:
for f in folder.glob(f"*{ext}"):
img_files[f.stem] = f
# 获取所有 txt 文件stem -> full path
txt_files = {}
for f in folder.glob("*.txt"):
txt_files[f.stem] = f
all_stems = set(img_files.keys()) | set(txt_files.keys())
to_delete_imgs = []
to_delete_txts = []
for stem in sorted(all_stems):
has_img = stem in img_files
has_txt = stem in txt_files
if not has_img and has_txt:
to_delete_txts.append(txt_files[stem])
elif has_img and not has_txt:
to_delete_imgs.append(img_files[stem])
# 如果都有,保留;如果都无(理论上不会),跳过
# 输出结果
print("🔍 扫描完成。")
print(f"🖼️ 图片数量: {len(img_files)}")
print(f"📄 TXT 数量 : {len(txt_files)}")
print()
if to_delete_imgs:
print("🗑️ 将删除无对应 TXT 的图片:")
for f in to_delete_imgs:
print(f" - {f}")
else:
print("✅ 所有图片都有对应的 TXT")
if to_delete_txts:
print("\n🗑️ 将删除无对应图片的 TXT:")
for f in to_delete_txts:
print(f" - {f}")
else:
print("✅ 所有 TXT 都有对应的图片")
total_to_delete = len(to_delete_imgs) + len(to_delete_txts)
print(f"\n📌 总共将删除 {total_to_delete} 个文件")
if DRY_RUN:
print("\n💡 当前为预览模式 (DRY_RUN=True),未执行真实删除。")
print(" 如需真实删除,请将 DRY_RUN 改为 False 并重新运行。")
else:
print("\n⚠️ 正在执行删除...")
for f in to_delete_imgs + to_delete_txts:
try:
f.unlink()
print(f"✅ 已删除: {f}")
except Exception as e:
print(f"❌ 删除失败: {f} - {e}")
print("✅ 清理完成!")
if __name__ == "__main__":
main()