ailai_image_point_diff/detect_image/capture-image_2.py

import cv2
import time
import os
import numpy as np
from PIL import Image
from skimage.metrics import structural_similarity as ssim
from rknnlite.api import RKNNLite

# ================== 配置 ==================
RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.234:554/streaming/channels/101"
RKNN_MODEL = "bag3568.rknn"
OUTPUT_DIR = "camera_event_capture"

CONF_THRESHOLD = 0.5
SSIM_THRESHOLD = 0.9

END_MISS_FRAMES = 30        # 连续多少帧未检测到 → 结束采集
SAVE_EVERY_N_FRAMES = 1     # 采集中每 N 帧保存一次
SHOW_WINDOW = False

# 灰度判断参数
GRAY_LOWER = 70
GRAY_UPPER = 230
GRAY_RATIO_THRESHOLD = 0.7

IMG_SIZE = (640, 640)
OBJ_THRESH = 0.001
NMS_THRESH = 0.45
CLASS_NAME = ["bag"]

os.makedirs(OUTPUT_DIR, exist_ok=True)

# ================== 灰度判断 ==================
def is_large_gray(image):
    img = np.array(image)
    if img.ndim != 3 or img.shape[2] != 3:
        return True
    h, w, _ = img.shape
    gray_mask = (
        (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
        (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
        (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
    )
    return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD

# ================== RKNN 推理工具 ==================
def letterbox_resize(image, size, bg_color=114):
    target_w, target_h = size
    h, w = image.shape[:2]
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(image, (new_w, new_h))
    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
    canvas[dy:dy + new_h, dx:dx + new_w] = resized
    return canvas, scale, dx, dy

def dfl_numpy(position):
    n, c, h, w = position.shape
    p_num = 4
    mc = c // p_num
    y = position.reshape(n, p_num, mc, h, w)
    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
    acc = np.arange(mc).reshape(1,1,mc,1,1)
    return np.sum(y * acc, axis=2)

def box_process(position):
    grid_h, grid_w = position.shape[2:4]
    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
    col = col.reshape(1,1,grid_h,grid_w)
    row = row.reshape(1,1,grid_h,grid_w)
    grid = np.concatenate((col,row), axis=1)
    stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
    position = dfl_numpy(position)
    box_xy = grid + 0.5 - position[:,0:2,:,:]
    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
    return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)

def filter_boxes(boxes, box_confidences, box_class_probs):
    boxes = boxes.reshape(-1,4)
    box_confidences = box_confidences.reshape(-1)
    box_class_probs = np.array(box_class_probs)

    class_ids = np.argmax(box_class_probs, axis=-1)
    class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
    scores = box_confidences * class_scores

    mask = scores >= OBJ_THRESH
    return np.sum(mask) > 0  # True: 有 bag, False: 无 bag

def post_process(outputs, scale, dx, dy):
    boxes_list, conf_list, class_list = [], [], []
    for i in range(3):
        boxes_list.append(box_process(outputs[i*3]))
        conf_list.append(outputs[i*3+2])
        class_list.append(outputs[i*3+1])

    def flatten(x):
        x = x.transpose(0,2,3,1)
        return x.reshape(-1,x.shape[3])

    boxes = np.concatenate([flatten(b) for b in boxes_list])
    box_conf = np.concatenate([flatten(c) for c in conf_list])
    class_probs = np.concatenate([flatten(c) for c in class_list])
    return filter_boxes(boxes, box_conf, class_probs)

# ================== RKNN 初始化 ==================
rknn = RKNNLite()
if rknn.load_rknn(RKNN_MODEL) != 0:
    raise RuntimeError("RKNN 模型加载失败")
if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
    raise RuntimeError("RKNN Runtime 初始化失败")
print("✅ RKNN 初始化完成")

# ================== 视频流 ==================
cap = cv2.VideoCapture(RTSP_URL)
if not cap.isOpened():
    raise RuntimeError("RTSP 连接失败")
print("🎥 视频流已连接")

# ================== 状态机 ==================
STATE_IDLE = 0
STATE_CAPTURING = 1

state = STATE_IDLE
miss_count = 0
save_idx = 0
session_dir = None
session_id = 0
last_gray = None
frame_count = 0

try:
    while True:
        ret, frame = cap.read()
        if not ret:
            time.sleep(0.5)
            continue

        frame_count += 1

        if SHOW_WINDOW:
            cv2.imshow("Camera", frame)
            if cv2.waitKey(1) == ord('q'):
                break

        # ---------- 灰度过滤 ----------
        pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        if is_large_gray(pil_image):
            continue

        # ---------- SSIM 去重 ----------
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if last_gray is not None and state == STATE_IDLE:
            sim = ssim(gray, last_gray)
            if sim > SSIM_THRESHOLD:
                continue
        last_gray = gray.copy()

        # ---------- RKNN 推理判断是否有 bag ----------
        img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
        input_data = np.expand_dims(img_resized, 0)
        outputs = rknn.inference(inputs=[input_data])
        has_bag = post_process(outputs, scale, dx, dy)

        # ---------- 状态机 ----------
        if state == STATE_IDLE:
            if has_bag:
                session_id += 1
                ts = time.strftime("%Y%m%d_%H%M%S")
                session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}")
                os.makedirs(session_dir, exist_ok=True)
                print(f"\n🚀 进入采集")
                state = STATE_CAPTURING
                miss_count = 0
                save_idx = 0

        elif state == STATE_CAPTURING:
            if has_bag:
                miss_count = 0
            else:
                miss_count += 1

            if save_idx % SAVE_EVERY_N_FRAMES == 0:
                ts = time.strftime("%Y%m%d_%H%M%S")
                ms = int((time.time()%1)*1000)
                fname = f"{save_idx:06d}_{ts}_{ms:03d}.png"
                cv2.imwrite(os.path.join(session_dir, fname), frame)  # 保存原图
            save_idx += 1

            if miss_count >= END_MISS_FRAMES:
                print(f"🛑 退出采集，本次保存 {save_idx} 帧")
                state = STATE_IDLE
                miss_count = 0
                session_dir = None

except KeyboardInterrupt:
    print("\n🛑 用户退出")

finally:
    cap.release()
    cv2.destroyAllWindows()
    rknn.release()
    print("程序结束")