import cv2 import time import os import numpy as np from PIL import Image from skimage.metrics import structural_similarity as ssim from rknnlite.api import RKNNLite # ================== 配置 ================== RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.234:554/streaming/channels/101" RKNN_MODEL = "bag3568.rknn" OUTPUT_DIR = "camera_event_capture" CONF_THRESHOLD = 0.5 SSIM_THRESHOLD = 0.9 END_MISS_FRAMES = 30 # 连续多少帧未检测到 → 结束采集 SAVE_EVERY_N_FRAMES = 1 # 采集中每 N 帧保存一次 SHOW_WINDOW = False # 灰度判断参数 GRAY_LOWER = 70 GRAY_UPPER = 230 GRAY_RATIO_THRESHOLD = 0.7 IMG_SIZE = (640, 640) OBJ_THRESH = 0.001 NMS_THRESH = 0.45 CLASS_NAME = ["bag"] os.makedirs(OUTPUT_DIR, exist_ok=True) # ================== 灰度判断 ================== def is_large_gray(image): img = np.array(image) if img.ndim != 3 or img.shape[2] != 3: return True h, w, _ = img.shape gray_mask = ( (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) & (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) & (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER) ) return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD # ================== RKNN 推理工具 ================== def letterbox_resize(image, size, bg_color=114): target_w, target_h = size h, w = image.shape[:2] scale = min(target_w / w, target_h / h) new_w, new_h = int(w * scale), int(h * scale) resized = cv2.resize(image, (new_w, new_h)) canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 canvas[dy:dy + new_h, dx:dx + new_w] = resized return canvas, scale, dx, dy def dfl_numpy(position): n, c, h, w = position.shape p_num = 4 mc = c // p_num y = position.reshape(n, p_num, mc, h, w) y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) acc = np.arange(mc).reshape(1,1,mc,1,1) return np.sum(y * acc, axis=2) def box_process(position): grid_h, grid_w = position.shape[2:4] col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) col = col.reshape(1,1,grid_h,grid_w) row = row.reshape(1,1,grid_h,grid_w) grid = np.concatenate((col,row), axis=1) stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) position = dfl_numpy(position) box_xy = grid + 0.5 - position[:,0:2,:,:] box_xy2 = grid + 0.5 + position[:,2:4,:,:] return np.concatenate((box_xy*stride, box_xy2*stride), axis=1) def filter_boxes(boxes, box_confidences, box_class_probs): boxes = boxes.reshape(-1,4) box_confidences = box_confidences.reshape(-1) box_class_probs = np.array(box_class_probs) class_ids = np.argmax(box_class_probs, axis=-1) class_scores = box_class_probs[np.arange(len(class_ids)), class_ids] scores = box_confidences * class_scores mask = scores >= OBJ_THRESH return np.sum(mask) > 0 # True: 有 bag, False: 无 bag def post_process(outputs, scale, dx, dy): boxes_list, conf_list, class_list = [], [], [] for i in range(3): boxes_list.append(box_process(outputs[i*3])) conf_list.append(outputs[i*3+2]) class_list.append(outputs[i*3+1]) def flatten(x): x = x.transpose(0,2,3,1) return x.reshape(-1,x.shape[3]) boxes = np.concatenate([flatten(b) for b in boxes_list]) box_conf = np.concatenate([flatten(c) for c in conf_list]) class_probs = np.concatenate([flatten(c) for c in class_list]) return filter_boxes(boxes, box_conf, class_probs) # ================== RKNN 初始化 ================== rknn = RKNNLite() if rknn.load_rknn(RKNN_MODEL) != 0: raise RuntimeError("RKNN 模型加载失败") if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0: raise RuntimeError("RKNN Runtime 初始化失败") print("✅ RKNN 初始化完成") # ================== 视频流 ================== cap = cv2.VideoCapture(RTSP_URL) if not cap.isOpened(): raise RuntimeError("RTSP 连接失败") print("🎥 视频流已连接") # ================== 状态机 ================== STATE_IDLE = 0 STATE_CAPTURING = 1 state = STATE_IDLE miss_count = 0 save_idx = 0 session_dir = None session_id = 0 last_gray = None frame_count = 0 try: while True: ret, frame = cap.read() if not ret: time.sleep(0.5) continue frame_count += 1 if SHOW_WINDOW: cv2.imshow("Camera", frame) if cv2.waitKey(1) == ord('q'): break # ---------- 灰度过滤 ---------- pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) if is_large_gray(pil_image): continue # ---------- SSIM 去重 ---------- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if last_gray is not None and state == STATE_IDLE: sim = ssim(gray, last_gray) if sim > SSIM_THRESHOLD: continue last_gray = gray.copy() # ---------- RKNN 推理判断是否有 bag ---------- img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE) input_data = np.expand_dims(img_resized, 0) outputs = rknn.inference(inputs=[input_data]) has_bag = post_process(outputs, scale, dx, dy) # ---------- 状态机 ---------- if state == STATE_IDLE: if has_bag: session_id += 1 ts = time.strftime("%Y%m%d_%H%M%S") session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}") os.makedirs(session_dir, exist_ok=True) print(f"\n🚀 进入采集") state = STATE_CAPTURING miss_count = 0 save_idx = 0 elif state == STATE_CAPTURING: if has_bag: miss_count = 0 else: miss_count += 1 if save_idx % SAVE_EVERY_N_FRAMES == 0: ts = time.strftime("%Y%m%d_%H%M%S") ms = int((time.time()%1)*1000) fname = f"{save_idx:06d}_{ts}_{ms:03d}.png" cv2.imwrite(os.path.join(session_dir, fname), frame) # 保存原图 save_idx += 1 if miss_count >= END_MISS_FRAMES: print(f"🛑 退出采集,本次保存 {save_idx} 帧") state = STATE_IDLE miss_count = 0 session_dir = None except KeyboardInterrupt: print("\n🛑 用户退出") finally: cap.release() cv2.destroyAllWindows() rknn.release() print("程序结束")