import os import cv2 from skimage.metrics import structural_similarity as ssim def calculate_ssim(image_path1, image_path2): """ 计算两张图片的 SSIM 相似度 """ # 读取图像 img1 = cv2.imread(image_path1) img2 = cv2.imread(image_path2) if img1 is None: print(f"❌ 无法读取图片1: {image_path1}") return None if img2 is None: print(f"❌ 无法读取图片2: {image_path2}") return None # 转为灰度图 gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) # 确保尺寸一致 if gray1.shape != gray2.shape: print("⚠️ 图像尺寸不一致,正在调整...") h, w = min(gray1.shape[0], gray2.shape[0]), min(gray1.shape[1], gray2.shape[1]) gray1 = cv2.resize(gray1, (w, h)) gray2 = cv2.resize(gray2, (w, h)) # 计算 SSIM try: similarity = ssim(gray1, gray2) return similarity except Exception as e: print(f"❌ SSIM 计算失败: {e}") return None def delete_similar_consecutive_images(folder_path, threshold=0.95, extensions=None): """ 删除相似度高于阈值的连续图片 :param folder_path: 图片所在的文件夹路径 :param threshold: SSIM 阈值,默认为 0.95 :param extensions: 支持的图片格式列表,默认为 ['.jpg', '.jpeg', '.png'] """ if extensions is None: extensions = ['.jpg', '.jpeg', '.png'] folder = os.path.abspath(folder_path) if not os.path.exists(folder): print(f"❌ 文件夹不存在: {folder_path}") return # 获取所有图片文件路径 image_files = [] for ext in extensions: image_files.extend([os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(ext)]) if not image_files: print(f"🔍 文件夹中没有找到图片: {folder_path}") return # 按文件名排序以确保顺序正确 image_files.sort() print(f"🔍 扫描到 {len(image_files)} 张图片...") deleted_count = 0 # 遍历每一对连续的图片 for i in range(len(image_files) - 1): img_path1 = image_files[i] img_path2 = image_files[i + 1] similarity = calculate_ssim(img_path1, img_path2) if similarity is not None and similarity > threshold: print(f"🗑️ 删除相似图片: {img_path2} (SSIM: {similarity:.4f})") try: os.remove(img_path2) deleted_count += 1 except Exception as e: print(f"❌ 删除失败 {img_path2}: {e}") else: print(f"✅ 保留图片: {img_path2}") print("\n" + "=" * 50) print(f"✅ 删除完成!共删除 {deleted_count} 张相似图片") print(f"📁 保留图片数: {len(image_files) - deleted_count}") print("=" * 50) # ================== 用户配置 ================== FOLDER_PATH = "/media/hx/disk/folder_5" # 修改为你的图片文件夹路径 THRESHOLD = 0.90 # SSIM 阈值 # ================== 执行 ================== if __name__ == "__main__": print(f"🚀 开始检测并删除相似图片...") delete_similar_consecutive_images( folder_path=FOLDER_PATH, threshold=THRESHOLD )