Files
zjsh_yolov11/image/del_photo/del_image_gray.py
2025-09-01 14:14:18 +08:00

118 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
from pathlib import Path
from PIL import Image
import numpy as np
def is_grayscale_image(image_path, saturation_threshold=0.05, gray_intensity_threshold=200):
"""
判断图像是否为“灰色图片”(低饱和度或接近灰度)
:param image_path: 图像路径
:param saturation_threshold: 饱和度阈值0~1越低越可能是灰色
:param gray_intensity_threshold: 亮度阈值,过滤纯白/纯黑
:return: True 表示是灰色图,应删除
"""
try:
img = Image.open(image_path)
# 转为 RGB处理灰度图自动转为 3 通道)
if img.mode != 'RGB':
img = img.convert('RGB')
# 转为 numpy 数组
rgb = np.array(img).astype(np.float32) # (H, W, 3)
H, W, _ = rgb.shape
if H * W == 0:
return True # 空图
# 转为 HSV手动计算避免 PIL 的 hsv 转换问题)
r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2]
max_c = np.maximum(np.maximum(r, g), b)
min_c = np.minimum(np.minimum(r, g), b)
delta = max_c - min_c
# 饱和度 S = delta / max_c
with np.errstate(divide='ignore', invalid='ignore'):
s = np.where(max_c == 0, 0, delta / max_c)
# 只取非纯黑区域的饱和度(避免纯黑区域干扰)
valid_s = s[(max_c > 10) & (max_c < gray_intensity_threshold)] # 忽略极暗和极亮
if len(valid_s) == 0:
return True # 全黑或全白
# 计算平均饱和度
avg_saturation = valid_s.mean()
# 如果平均饱和度很低,认为是灰色图
return avg_saturation < saturation_threshold
except Exception as e:
print(f"⚠️ 无法读取图像 {image_path}: {e}")
return True # 出错的图也删除(可选)
def delete_gray_images(folder_path, extensions=None, dry_run=False):
"""
删除文件夹中的灰色图片
:param folder_path: 图片文件夹路径
:param extensions: 支持的图片格式
:param dry_run: 如果为 True只打印不删除
"""
if extensions is None:
extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
folder = Path(folder_path)
if not folder.exists():
print(f"❌ 文件夹不存在: {folder_path}")
return
image_files = []
for ext in extensions:
image_files.extend(folder.glob(f'*{ext}'))
image_files.extend(folder.glob(f'*{ext.upper()}'))
if not image_files:
print(f"🔍 文件夹中没有找到图片: {folder_path}")
return
print(f"🔍 扫描到 {len(image_files)} 张图片...")
deleted_count = 0
for img_path in image_files:
if is_grayscale_image(img_path):
print(f"🗑️ 灰色图: {img_path.name}")
if not dry_run:
try:
img_path.unlink() # 删除文件
print(f"✅ 已删除: {img_path.name}")
deleted_count += 1
except Exception as e:
print(f"❌ 删除失败 {img_path.name}: {e}")
else:
print(f"✅ 彩色图: {img_path.name} (保留)")
print("\n" + "=" * 50)
if dry_run:
print(f"🧪 模拟完成,共发现 {deleted_count} 张灰色图将被删除")
else:
print(f"✅ 删除完成!共删除 {deleted_count} 张灰色图片")
print(f"📁 保留图片数: {len(image_files) - deleted_count}")
print("=" * 50)
# ================== 用户配置 ==================
FOLDER_PATH = "/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/f15" # 修改为你的图片文件夹
DRY_RUN = False # 先设为 True 测试,确认无误后再改为 False
# ================== 执行 ==================
if __name__ == "__main__":
print(f"🚀 开始检测并删除灰色图片...")
delete_gray_images(
folder_path=FOLDER_PATH,
dry_run=DRY_RUN
)