import os import cv2 import shutil # ====================== 用户配置 ====================== SOURCE_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/zjdata15/2" # 原图片文件夹 OUTPUT_DIR = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/zjdata15/2" # 模糊图输出文件夹 BLUR_THRESHOLD = 180 # 模糊阈值,越大越严格(100~300) # ====================================================== IMG_EXT = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"] os.makedirs(OUTPUT_DIR, exist_ok=True) def is_blurry(image_path, threshold=150): """使用拉普拉斯方差检测是否模糊""" img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) if img is None: return False # 读取失败跳过 variance = cv2.Laplacian(img, cv2.CV_64F).var() return variance < threshold, variance def scan_and_filter(): total = 0 bad = 0 for filename in os.listdir(SOURCE_DIR): if not any(filename.lower().endswith(ext) for ext in IMG_EXT): continue img_path = os.path.join(SOURCE_DIR, filename) total += 1 is_bad, score = is_blurry(img_path, BLUR_THRESHOLD) if is_bad: bad += 1 shutil.copy(img_path, os.path.join(OUTPUT_DIR, filename)) print(f"[低质量] {filename} — 清晰度评分: {score:.2f}") else: print(f"[清晰] {filename} — 清晰度评分: {score:.2f}") print("\n============================") print(f"扫描总数: {total}") print(f"检测为模糊/马赛克: {bad}") print(f"已保存到: {OUTPUT_DIR}") print("============================\n") if __name__ == "__main__": scan_and_filter()