103 lines
3.3 KiB
Python
103 lines
3.3 KiB
Python
import os
|
|
import cv2
|
|
from skimage.metrics import structural_similarity as ssim
|
|
|
|
def calculate_ssim(image_path1, image_path2):
|
|
"""
|
|
计算两张图片的 SSIM 相似度
|
|
"""
|
|
# 读取图像
|
|
img1 = cv2.imread(image_path1)
|
|
img2 = cv2.imread(image_path2)
|
|
|
|
if img1 is None:
|
|
print(f"❌ 无法读取图片1: {image_path1}")
|
|
return None
|
|
if img2 is None:
|
|
print(f"❌ 无法读取图片2: {image_path2}")
|
|
return None
|
|
|
|
# 转为灰度图
|
|
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
|
|
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
|
|
|
|
# 确保尺寸一致
|
|
if gray1.shape != gray2.shape:
|
|
print("⚠️ 图像尺寸不一致,正在调整...")
|
|
h, w = min(gray1.shape[0], gray2.shape[0]), min(gray1.shape[1], gray2.shape[1])
|
|
gray1 = cv2.resize(gray1, (w, h))
|
|
gray2 = cv2.resize(gray2, (w, h))
|
|
|
|
# 计算 SSIM
|
|
try:
|
|
similarity = ssim(gray1, gray2)
|
|
return similarity
|
|
except Exception as e:
|
|
print(f"❌ SSIM 计算失败: {e}")
|
|
return None
|
|
|
|
|
|
def delete_similar_consecutive_images(folder_path, threshold=0.95, extensions=None):
|
|
"""
|
|
删除相似度高于阈值的连续图片
|
|
|
|
:param folder_path: 图片所在的文件夹路径
|
|
:param threshold: SSIM 阈值,默认为 0.95
|
|
:param extensions: 支持的图片格式列表,默认为 ['.jpg', '.jpeg', '.png']
|
|
"""
|
|
if extensions is None:
|
|
extensions = ['.jpg', '.jpeg', '.png']
|
|
|
|
folder = os.path.abspath(folder_path)
|
|
if not os.path.exists(folder):
|
|
print(f"❌ 文件夹不存在: {folder_path}")
|
|
return
|
|
|
|
# 获取所有图片文件路径
|
|
image_files = []
|
|
for ext in extensions:
|
|
image_files.extend([os.path.join(folder, f) for f in os.listdir(folder) if f.lower().endswith(ext)])
|
|
|
|
if not image_files:
|
|
print(f"🔍 文件夹中没有找到图片: {folder_path}")
|
|
return
|
|
|
|
# 按文件名排序以确保顺序正确
|
|
image_files.sort()
|
|
|
|
print(f"🔍 扫描到 {len(image_files)} 张图片...")
|
|
deleted_count = 0
|
|
|
|
# 遍历每一对连续的图片
|
|
for i in range(len(image_files) - 1):
|
|
img_path1 = image_files[i]
|
|
img_path2 = image_files[i + 1]
|
|
|
|
similarity = calculate_ssim(img_path1, img_path2)
|
|
if similarity is not None and similarity > threshold:
|
|
print(f"🗑️ 删除相似图片: {img_path2} (SSIM: {similarity:.4f})")
|
|
try:
|
|
os.remove(img_path2)
|
|
deleted_count += 1
|
|
except Exception as e:
|
|
print(f"❌ 删除失败 {img_path2}: {e}")
|
|
else:
|
|
print(f"✅ 保留图片: {img_path2}")
|
|
|
|
print("\n" + "=" * 50)
|
|
print(f"✅ 删除完成!共删除 {deleted_count} 张相似图片")
|
|
print(f"📁 保留图片数: {len(image_files) - deleted_count}")
|
|
print("=" * 50)
|
|
|
|
|
|
# ================== 用户配置 ==================
|
|
FOLDER_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/f15" # 修改为你的图片文件夹路径
|
|
THRESHOLD = 0.90 # SSIM 阈值
|
|
|
|
# ================== 执行 ==================
|
|
if __name__ == "__main__":
|
|
print(f"🚀 开始检测并删除相似图片...")
|
|
delete_similar_consecutive_images(
|
|
folder_path=FOLDER_PATH,
|
|
threshold=THRESHOLD
|
|
) |