2025-08-13 14:49:06 +08:00
|
|
|
|
import os
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from PIL import Image
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_grayscale_image(image_path, saturation_threshold=0.05, gray_intensity_threshold=200):
|
|
|
|
|
|
"""
|
|
|
|
|
|
判断图像是否为“灰色图片”(低饱和度或接近灰度)
|
|
|
|
|
|
|
|
|
|
|
|
:param image_path: 图像路径
|
|
|
|
|
|
:param saturation_threshold: 饱和度阈值(0~1),越低越可能是灰色
|
|
|
|
|
|
:param gray_intensity_threshold: 亮度阈值,过滤纯白/纯黑
|
|
|
|
|
|
:return: True 表示是灰色图,应删除
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
img = Image.open(image_path)
|
|
|
|
|
|
|
|
|
|
|
|
# 转为 RGB(处理灰度图自动转为 3 通道)
|
|
|
|
|
|
if img.mode != 'RGB':
|
|
|
|
|
|
img = img.convert('RGB')
|
|
|
|
|
|
|
|
|
|
|
|
# 转为 numpy 数组
|
|
|
|
|
|
rgb = np.array(img).astype(np.float32) # (H, W, 3)
|
|
|
|
|
|
H, W, _ = rgb.shape
|
|
|
|
|
|
|
|
|
|
|
|
if H * W == 0:
|
|
|
|
|
|
return True # 空图
|
|
|
|
|
|
|
|
|
|
|
|
# 转为 HSV(手动计算避免 PIL 的 hsv 转换问题)
|
|
|
|
|
|
r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2]
|
|
|
|
|
|
|
|
|
|
|
|
max_c = np.maximum(np.maximum(r, g), b)
|
|
|
|
|
|
min_c = np.minimum(np.minimum(r, g), b)
|
|
|
|
|
|
delta = max_c - min_c
|
|
|
|
|
|
|
|
|
|
|
|
# 饱和度 S = delta / max_c
|
|
|
|
|
|
with np.errstate(divide='ignore', invalid='ignore'):
|
|
|
|
|
|
s = np.where(max_c == 0, 0, delta / max_c)
|
|
|
|
|
|
|
|
|
|
|
|
# 只取非纯黑区域的饱和度(避免纯黑区域干扰)
|
|
|
|
|
|
valid_s = s[(max_c > 10) & (max_c < gray_intensity_threshold)] # 忽略极暗和极亮
|
|
|
|
|
|
|
|
|
|
|
|
if len(valid_s) == 0:
|
|
|
|
|
|
return True # 全黑或全白
|
|
|
|
|
|
|
|
|
|
|
|
# 计算平均饱和度
|
|
|
|
|
|
avg_saturation = valid_s.mean()
|
|
|
|
|
|
|
|
|
|
|
|
# 如果平均饱和度很低,认为是灰色图
|
|
|
|
|
|
return avg_saturation < saturation_threshold
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"⚠️ 无法读取图像 {image_path}: {e}")
|
|
|
|
|
|
return True # 出错的图也删除(可选)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def delete_gray_images(folder_path, extensions=None, dry_run=False):
|
|
|
|
|
|
"""
|
|
|
|
|
|
删除文件夹中的灰色图片
|
|
|
|
|
|
|
|
|
|
|
|
:param folder_path: 图片文件夹路径
|
|
|
|
|
|
:param extensions: 支持的图片格式
|
|
|
|
|
|
:param dry_run: 如果为 True,只打印不删除
|
|
|
|
|
|
"""
|
|
|
|
|
|
if extensions is None:
|
|
|
|
|
|
extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
|
|
|
|
|
|
|
|
|
|
|
|
folder = Path(folder_path)
|
|
|
|
|
|
if not folder.exists():
|
|
|
|
|
|
print(f"❌ 文件夹不存在: {folder_path}")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
image_files = []
|
|
|
|
|
|
for ext in extensions:
|
|
|
|
|
|
image_files.extend(folder.glob(f'*{ext}'))
|
|
|
|
|
|
image_files.extend(folder.glob(f'*{ext.upper()}'))
|
|
|
|
|
|
|
|
|
|
|
|
if not image_files:
|
|
|
|
|
|
print(f"🔍 文件夹中没有找到图片: {folder_path}")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
print(f"🔍 扫描到 {len(image_files)} 张图片...")
|
|
|
|
|
|
deleted_count = 0
|
|
|
|
|
|
|
|
|
|
|
|
for img_path in image_files:
|
|
|
|
|
|
if is_grayscale_image(img_path):
|
|
|
|
|
|
print(f"🗑️ 灰色图: {img_path.name}")
|
|
|
|
|
|
if not dry_run:
|
|
|
|
|
|
try:
|
|
|
|
|
|
img_path.unlink() # 删除文件
|
|
|
|
|
|
print(f"✅ 已删除: {img_path.name}")
|
|
|
|
|
|
deleted_count += 1
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"❌ 删除失败 {img_path.name}: {e}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"✅ 彩色图: {img_path.name} (保留)")
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 50)
|
|
|
|
|
|
if dry_run:
|
|
|
|
|
|
print(f"🧪 模拟完成,共发现 {deleted_count} 张灰色图将被删除")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"✅ 删除完成!共删除 {deleted_count} 张灰色图片")
|
|
|
|
|
|
print(f"📁 保留图片数: {len(image_files) - deleted_count}")
|
|
|
|
|
|
print("=" * 50)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ================== 用户配置 ==================
|
2025-08-14 18:24:45 +08:00
|
|
|
|
FOLDER_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/f10" # 修改为你的图片文件夹
|
2025-08-13 14:49:06 +08:00
|
|
|
|
DRY_RUN = False # 先设为 True 测试,确认无误后再改为 False
|
|
|
|
|
|
|
|
|
|
|
|
# ================== 执行 ==================
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
print(f"🚀 开始检测并删除灰色图片...")
|
|
|
|
|
|
delete_gray_images(
|
|
|
|
|
|
folder_path=FOLDER_PATH,
|
|
|
|
|
|
dry_run=DRY_RUN
|
|
|
|
|
|
)
|