Files
zjsh_yolov11/image/del_photo/del_image_ssim.py
2025-09-01 14:14:18 +08:00

103 lines
3.3 KiB
Python

import os
import cv2
from skimage.metrics import structural_similarity as ssim
def calculate_ssim(image_path1, image_path2):
"""
计算两张图片的 SSIM 相似度
"""
# 读取图像
img1 = cv2.imread(image_path1)
img2 = cv2.imread(image_path2)
if img1 is None:
print(f"❌ 无法读取图片1: {image_path1}")
return None
if img2 is None:
print(f"❌ 无法读取图片2: {image_path2}")
return None
# 转为灰度图
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
# 确保尺寸一致
if gray1.shape != gray2.shape:
print("⚠️ 图像尺寸不一致,正在调整...")
h, w = min(gray1.shape[0], gray2.shape[0]), min(gray1.shape[1], gray2.shape[1])
gray1 = cv2.resize(gray1, (w, h))
gray2 = cv2.resize(gray2, (w, h))
# 计算 SSIM
try:
similarity = ssim(gray1, gray2)
return similarity
except Exception as e:
print(f"❌ SSIM 计算失败: {e}")
return None
def delete_similar_consecutive_images(folder_path, threshold=0.95, extensions=None):
"""
删除相似度高于阈值的连续图片
:param folder_path: 图片所在的文件夹路径
:param threshold: SSIM 阈值,默认为 0.95
:param extensions: 支持的图片格式列表,默认为 ['.jpg', '.jpeg', '.png']
"""
if extensions is None:
extensions = ['.jpg', '.jpeg', '.png']
folder = os.path.abspath(folder_path)
if not os.path.exists(folder):
print(f"❌ 文件夹不存在: {folder_path}")
return
# 获取所有图片文件路径
image_files = []
for ext in extensions:
image_files.extend([os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(ext)])
if not image_files:
print(f"🔍 文件夹中没有找到图片: {folder_path}")
return
# 按文件名排序以确保顺序正确
image_files.sort()
print(f"🔍 扫描到 {len(image_files)} 张图片...")
deleted_count = 0
# 遍历每一对连续的图片
for i in range(len(image_files) - 1):
img_path1 = image_files[i]
img_path2 = image_files[i + 1]
similarity = calculate_ssim(img_path1, img_path2)
if similarity is not None and similarity > threshold:
print(f"🗑️ 删除相似图片: {img_path2} (SSIM: {similarity:.4f})")
try:
os.remove(img_path2)
deleted_count += 1
except Exception as e:
print(f"❌ 删除失败 {img_path2}: {e}")
else:
print(f"✅ 保留图片: {img_path2}")
print("\n" + "=" * 50)
print(f"✅ 删除完成!共删除 {deleted_count} 张相似图片")
print(f"📁 保留图片数: {len(image_files) - deleted_count}")
print("=" * 50)
# ================== 用户配置 ==================
FOLDER_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/f15" # 修改为你的图片文件夹路径
THRESHOLD = 0.90 # SSIM 阈值
# ================== 执行 ==================
if __name__ == "__main__":
print(f"🚀 开始检测并删除相似图片...")
delete_similar_consecutive_images(
folder_path=FOLDER_PATH,
threshold=THRESHOLD
)