更新料带目标检测，判断料带到位逻辑

2025-12-28 00:12:46 +08:00
parent 72b5052d2e
commit d6918e90f2
16 changed files with 1398 additions and 2 deletions
--- a/detect_image/1.jpg
+++ b/detect_image/1.jpg
--- a/detect_image/2.jpg
+++ b/detect_image/2.jpg
--- a/detect_image/3.jpg
+++ b/detect_image/3.jpg
--- a/detect_image/bag3568.rknn
+++ b/detect_image/bag3568.rknn
--- a/detect_image/bag3588.rknn
+++ b/detect_image/bag3588.rknn
--- a/detect_image/bag_detect.py
+++ b/detect_image/bag_detect.py
@ -0,0 +1,166 @@
+import os
+import cv2
+import numpy as np
+from rknnlite.api import RKNNLite
+
+# ====================== 配置 ======================
+MODEL_PATH = "bag3588.rknn"      # RKNN 模型路径
+IMG_PATH = "2.jpg"     # 待推理图片路径
+IMG_SIZE = (640, 640)           # 模型输入尺寸 (w,h)
+OBJ_THRESH = 0.001              # 目标置信度阈值
+NMS_THRESH = 0.45               # NMS 阈值
+CLASS_NAME = ["bag"]
+OUTPUT_DIR = "./result"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ====================== 工具函数 ======================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+def dfl_numpy(position):
+    n, c, h, w = position.shape
+    p_num = 4
+    mc = c // p_num
+    y = position.reshape(n, p_num, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1,1,mc,1,1)
+    y = np.sum(y * acc, axis=2)
+    return y
+
+def box_process(position):
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1,1,grid_h,grid_w)
+    row = row.reshape(1,1,grid_h,grid_w)
+    grid = np.concatenate((col,row), axis=1)
+    stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1)
+    position = dfl_numpy(position)
+    box_xy = grid + 0.5 - position[:,0:2,:,:]
+    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
+    xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
+    return xyxy
+
+def filter_boxes(boxes, box_confidences, box_class_probs):
+    boxes = np.array(boxes).reshape(-1, 4)
+    box_confidences = np.array(box_confidences).reshape(-1)
+    box_class_probs = np.array(box_class_probs)
+    
+    class_ids = np.argmax(box_class_probs, axis=-1)
+    class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
+    scores = box_confidences * class_scores
+
+    mask = scores >= OBJ_THRESH
+    if np.sum(mask) == 0:
+        return None, None, None, None
+
+    boxes = boxes[mask]
+    classes = class_ids[mask]
+    scores = scores[mask]
+    conf_keep = box_confidences[mask]   # 原始 objectness
+
+    # NMS
+    x1, y1, x2, y2 = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(ovr <= NMS_THRESH)[0]
+        order = order[inds + 1]
+    return boxes[keep], classes[keep], scores[keep], conf_keep[keep]
+
+def post_process(outputs, scale, dx, dy):
+    boxes_list, conf_list, class_list = [], [], []
+    branch_num = 3
+    for i in range(branch_num):
+        boxes_list.append(box_process(outputs[i*3]))
+        conf_list.append(outputs[i*3+2])
+        class_list.append(outputs[i*3+1])
+
+    def flatten(x):
+        ch = x.shape[1]
+        x = x.transpose(0,2,3,1)
+        return x.reshape(-1,ch)
+
+    boxes = np.concatenate([flatten(b) for b in boxes_list])
+    box_conf = np.concatenate([flatten(c) for c in conf_list])
+    class_probs = np.concatenate([flatten(c) for c in class_list])
+
+    boxes, classes, scores, conf_keep = filter_boxes(boxes, box_conf, class_probs)
+    if boxes is None:
+        return None, None, None, None
+
+    boxes[:, [0,2]] -= dx
+    boxes[:, [1,3]] -= dy
+    boxes /= scale
+    boxes = boxes.clip(min=0)
+
+    # 将 objectness 置信度放大 255
+    scores = 1-scores
+    conf_keep = conf_keep * 255
+    return boxes, classes, scores, conf_keep
+
+# ====================== 单张图片推理 ======================
+def detect_single_image(img_path):
+    rknn = RKNNLite(verbose=False)
+    rknn.load_rknn(MODEL_PATH)
+    rknn.init_runtime()
+
+    img_name = os.path.basename(img_path)
+    img = cv2.imread(img_path)
+    if img is None:
+        raise FileNotFoundError(f"图片无法读取: {img_path}")
+
+    img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE)
+    input_data = np.expand_dims(img_resized, 0)
+    outputs = rknn.inference(inputs=[input_data])
+    boxes, classes, scores, conf_keep = post_process(outputs, scale, dx, dy)
+
+    if boxes is not None:
+        for i, box in enumerate(boxes):
+            x1, y1, x2, y2 = box.astype(int)
+            cls_id = classes[i]
+            score = scores[i]
+            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            cv2.putText(img,
+                        f"{CLASS_NAME[cls_id]}:{score:.1f}",
+                        (x1, max(y1-5,0)),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.6,
+                        (0, 255, 0),
+                        2)
+
+    # 保存图像
+    if conf_keep is not None and len(conf_keep) > 0:
+        score_strs = ["{:.0f}".format(s) for s in conf_keep]
+        name_root, ext = os.path.splitext(img_name)
+        new_name = name_root + "_conf_" + "_".join(score_strs) + ext
+    else:
+        new_name = img_name
+
+    save_path = os.path.join(OUTPUT_DIR, new_name)
+    cv2.imwrite(save_path, img)
+    print(f"{img_name} 推理完成，结果保存到: {save_path}")
+
+    rknn.release()
+
+# ====================== 调用 ======================
+detect_single_image(IMG_PATH)
+
--- a/detect_image/bag_judgment.py
+++ b/detect_image/bag_judgment.py
@ -0,0 +1,77 @@
+import cv2
+from detect_bag import detect_bag
+
+THRESHOLD_X = 537  # min_x 阈值
+
+def bag_judgment(img, return_conf=True, return_vis=False):
+    """
+    判断图片中的料袋状态，可动态返回置信度和可视化图像
+    Args:
+        img (np.ndarray): 待检测图片
+        return_conf (bool): 是否返回置信度
+        return_vis (bool): 是否返回可视化图像
+    Returns:
+        status_bool: True=到位, False=未到位, None=未检测到
+        status_text: 中文状态
+        conf: 最大置信度或 None
+        min_x: 最左边 x 坐标或 None
+        vis_img: 可视化图像或 None
+    """
+    # 调用 detect_bag
+    outputs = detect_bag(img, return_conf=return_conf, return_vis=return_vis)
+
+    # 初始化占位
+    conf = None
+    min_x = None
+    vis_img = None
+
+    # 根据返回值长度解析
+    if return_conf and return_vis:
+        if len(outputs) == 3:
+            conf, min_x, vis_img = outputs
+        elif len(outputs) == 2:
+            conf, min_x = outputs
+        elif len(outputs) == 1:
+            min_x = outputs[0]
+    elif return_conf and not return_vis:
+        if len(outputs) >= 2:
+            conf, min_x = outputs[:2]
+        elif len(outputs) == 1:
+            min_x = outputs[0]
+    elif not return_conf and return_vis:
+        if len(outputs) == 2:
+            min_x, vis_img = outputs
+        elif len(outputs) == 1:
+            min_x = outputs[0]
+    else:
+        min_x = outputs if isinstance(outputs, (int, float, np.number)) else outputs[0]
+
+    # 判断状态
+    if min_x is None:
+        status_bool = None
+        status_text = "没有料袋"
+    elif min_x >= THRESHOLD_X:
+        status_bool = True
+        status_text = "料袋到位"
+    else:
+        status_bool = False
+        status_text = "料袋未到位"
+
+    return status_bool, status_text, conf, min_x, vis_img
+
+
+# ====================== 测试 ======================
+if __name__ == "__main__":
+    IMG_PATH = "3.jpg"
+    img = cv2.imread(IMG_PATH)
+    if img is None:
+        raise FileNotFoundError(f"图片无法读取: {IMG_PATH}")
+
+    status_bool, status_text, conf, min_x, vis_img = bag_judgment(img, return_conf=True, return_vis=True)
+    print(f"判断结果: {status_bool}, 中文状态: {status_text}, conf={conf}, min_x={min_x}")
+
+    if vis_img is not None:
+        cv2.imshow("Vis", vis_img)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+
--- a/detect_image/capture-image_1.py
+++ b/detect_image/capture-image_1.py
@ -0,0 +1,202 @@
+import cv2
+import time
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+import shutil
+from rknnlite.api import RKNNLite
+
+# ================== 配置参数 ==================
+RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.234:554/streaming/channels/101"
+SAVE_INTERVAL = 15
+SSIM_THRESHOLD = 0.9
+OUTPUT_DIR = "camera_test"
+RKNN_MODEL = "bag3568.rknn"
+SHOW_WINDOW = False
+
+# 灰度判断参数
+GRAY_LOWER = 70
+GRAY_UPPER = 230
+GRAY_RATIO_THRESHOLD = 0.7
+
+IMG_SIZE = (640, 640)
+OBJ_THRESH = 0.001
+NMS_THRESH = 0.45
+CLASS_NAME = ["bag"]
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ================== 灰度判断 ==================
+def is_large_gray(image):
+    img = np.array(image)
+    if img.ndim != 3 or img.shape[2] != 3:
+        return True
+    h, w, _ = img.shape
+    gray_mask = (
+        (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
+        (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
+        (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
+    )
+    return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
+
+# ================== RKNN 工具函数 ==================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+def dfl_numpy(position):
+    n, c, h, w = position.shape
+    p_num = 4
+    mc = c // p_num
+    y = position.reshape(n, p_num, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1,1,mc,1,1)
+    return np.sum(y * acc, axis=2)
+
+def box_process(position):
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1,1,grid_h,grid_w)
+    row = row.reshape(1,1,grid_h,grid_w)
+    grid = np.concatenate((col,row), axis=1)
+    stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
+    position = dfl_numpy(position)
+    box_xy = grid + 0.5 - position[:,0:2,:,:]
+    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
+    return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
+
+def filter_boxes(boxes, box_confidences, box_class_probs):
+    boxes = boxes.reshape(-1,4)
+    box_confidences = box_confidences.reshape(-1)
+    box_class_probs = np.array(box_class_probs)
+    
+    class_ids = np.argmax(box_class_probs, axis=-1)
+    class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
+    scores = box_confidences * class_scores
+
+    mask = scores >= OBJ_THRESH
+    if np.sum(mask) == 0:
+        return None
+    return True  # 只需要判断是否有目标
+
+def post_process(outputs, scale, dx, dy):
+    boxes_list, conf_list, class_list = [], [], []
+    for i in range(3):
+        boxes_list.append(box_process(outputs[i*3]))
+        conf_list.append(outputs[i*3+2])
+        class_list.append(outputs[i*3+1])
+
+    def flatten(x):
+        x = x.transpose(0,2,3,1)
+        return x.reshape(-1,x.shape[3])
+    
+    boxes = np.concatenate([flatten(b) for b in boxes_list])
+    box_conf = np.concatenate([flatten(c) for c in conf_list])
+    class_probs = np.concatenate([flatten(c) for c in class_list])
+    return filter_boxes(boxes, box_conf, class_probs)
+
+# ================== RKNN 初始化 ==================
+rknn = RKNNLite()
+if rknn.load_rknn(RKNN_MODEL) != 0:
+    raise RuntimeError("❌ RKNN 模型加载失败")
+if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
+    raise RuntimeError("❌ RKNN Runtime 初始化失败")
+print("✅ RKNN 初始化完成")
+
+# ================== 视频流处理 ==================
+max_retry_seconds = 10
+retry_interval_seconds = 1
+
+last_gray = None
+frame_count = 0
+
+while True:
+    cap = cv2.VideoCapture(RTSP_URL)
+    start_time = time.time()
+
+    while not cap.isOpened():
+        if time.time() - start_time >= max_retry_seconds:
+            print("❌ 无法连接 RTSP")
+            exit(1)
+        time.sleep(retry_interval_seconds)
+        cap = cv2.VideoCapture(RTSP_URL)
+
+    print("✅ 开始读取视频流")
+
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                print("❌ 读取失败")
+                break
+
+            frame_count += 1
+
+            if SHOW_WINDOW:
+                cv2.imshow("Camera", frame)
+                if cv2.waitKey(1) == ord('q'):
+                    raise KeyboardInterrupt
+
+            if frame_count % SAVE_INTERVAL != 0:
+                continue
+
+            print(f"处理帧 {frame_count}")
+
+            # STEP1: 灰度过滤
+            pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+            if is_large_gray(pil_image):
+                print("跳过：大面积灰色")
+                continue
+
+            # STEP2: SSIM 去重
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            if last_gray is not None:
+                sim = ssim(gray, last_gray)
+                if sim > SSIM_THRESHOLD:
+                    print(f"跳过：SSIM={sim:.3f}")
+                    continue
+            last_gray = gray.copy()
+
+            # STEP3: RKNN 推理，只判断是否有 bag
+            img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
+            input_data = np.expand_dims(img_resized, 0)
+            outputs = rknn.inference(inputs=[input_data])
+            has_bag = post_process(outputs, scale, dx, dy)
+            if not has_bag:
+                print("跳过：未检测到 bag")
+                continue
+
+            # STEP4: 磁盘检查
+            _, _, free = shutil.disk_usage(OUTPUT_DIR)
+            if free < 5*1024**3:
+                print("❌ 磁盘空间不足")
+                raise SystemExit(1)
+
+            # STEP5: 保存原图
+            ts = time.strftime("%Y%m%d_%H%M%S")
+            ms = int((time.time()%1)*1000)
+            filename = f"bag_{ts}_{ms:03d}.png"
+            path = os.path.join(OUTPUT_DIR, filename)
+            cv2.imwrite(path, frame)  # 保存原图
+            print(f"✅ 已保存: {path}")
+
+    except KeyboardInterrupt:
+        print("\n🛑 用户中断")
+        break
+
+    finally:
+        cap.release()
+        cv2.destroyAllWindows()
+        print(f"视频流关闭，共处理 {frame_count} 帧")
+
+rknn.release()
+print("程序结束")
+
--- a/detect_image/capture-image_2.py
+++ b/detect_image/capture-image_2.py
@ -0,0 +1,205 @@
+import cv2
+import time
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+from rknnlite.api import RKNNLite
+
+# ================== 配置 ==================
+RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.234:554/streaming/channels/101"
+RKNN_MODEL = "bag3568.rknn"
+OUTPUT_DIR = "camera_event_capture"
+
+CONF_THRESHOLD = 0.5
+SSIM_THRESHOLD = 0.9
+
+END_MISS_FRAMES = 30        # 连续多少帧未检测到 → 结束采集
+SAVE_EVERY_N_FRAMES = 1     # 采集中每 N 帧保存一次
+SHOW_WINDOW = False
+
+# 灰度判断参数
+GRAY_LOWER = 70
+GRAY_UPPER = 230
+GRAY_RATIO_THRESHOLD = 0.7
+
+IMG_SIZE = (640, 640)
+OBJ_THRESH = 0.001
+NMS_THRESH = 0.45
+CLASS_NAME = ["bag"]
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ================== 灰度判断 ==================
+def is_large_gray(image):
+    img = np.array(image)
+    if img.ndim != 3 or img.shape[2] != 3:
+        return True
+    h, w, _ = img.shape
+    gray_mask = (
+        (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
+        (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
+        (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
+    )
+    return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
+
+# ================== RKNN 推理工具 ==================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+def dfl_numpy(position):
+    n, c, h, w = position.shape
+    p_num = 4
+    mc = c // p_num
+    y = position.reshape(n, p_num, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1,1,mc,1,1)
+    return np.sum(y * acc, axis=2)
+
+def box_process(position):
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1,1,grid_h,grid_w)
+    row = row.reshape(1,1,grid_h,grid_w)
+    grid = np.concatenate((col,row), axis=1)
+    stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
+    position = dfl_numpy(position)
+    box_xy = grid + 0.5 - position[:,0:2,:,:]
+    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
+    return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
+
+def filter_boxes(boxes, box_confidences, box_class_probs):
+    boxes = boxes.reshape(-1,4)
+    box_confidences = box_confidences.reshape(-1)
+    box_class_probs = np.array(box_class_probs)
+    
+    class_ids = np.argmax(box_class_probs, axis=-1)
+    class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
+    scores = box_confidences * class_scores
+
+    mask = scores >= OBJ_THRESH
+    return np.sum(mask) > 0  # True: 有 bag, False: 无 bag
+
+def post_process(outputs, scale, dx, dy):
+    boxes_list, conf_list, class_list = [], [], []
+    for i in range(3):
+        boxes_list.append(box_process(outputs[i*3]))
+        conf_list.append(outputs[i*3+2])
+        class_list.append(outputs[i*3+1])
+
+    def flatten(x):
+        x = x.transpose(0,2,3,1)
+        return x.reshape(-1,x.shape[3])
+    
+    boxes = np.concatenate([flatten(b) for b in boxes_list])
+    box_conf = np.concatenate([flatten(c) for c in conf_list])
+    class_probs = np.concatenate([flatten(c) for c in class_list])
+    return filter_boxes(boxes, box_conf, class_probs)
+
+# ================== RKNN 初始化 ==================
+rknn = RKNNLite()
+if rknn.load_rknn(RKNN_MODEL) != 0:
+    raise RuntimeError("RKNN 模型加载失败")
+if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
+    raise RuntimeError("RKNN Runtime 初始化失败")
+print("✅ RKNN 初始化完成")
+
+# ================== 视频流 ==================
+cap = cv2.VideoCapture(RTSP_URL)
+if not cap.isOpened():
+    raise RuntimeError("RTSP 连接失败")
+print("🎥 视频流已连接")
+
+# ================== 状态机 ==================
+STATE_IDLE = 0
+STATE_CAPTURING = 1
+
+state = STATE_IDLE
+miss_count = 0
+save_idx = 0
+session_dir = None
+session_id = 0
+last_gray = None
+frame_count = 0
+
+try:
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            time.sleep(0.5)
+            continue
+
+        frame_count += 1
+
+        if SHOW_WINDOW:
+            cv2.imshow("Camera", frame)
+            if cv2.waitKey(1) == ord('q'):
+                break
+
+        # ---------- 灰度过滤 ----------
+        pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        if is_large_gray(pil_image):
+            continue
+
+        # ---------- SSIM 去重 ----------
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        if last_gray is not None and state == STATE_IDLE:
+            sim = ssim(gray, last_gray)
+            if sim > SSIM_THRESHOLD:
+                continue
+        last_gray = gray.copy()
+
+        # ---------- RKNN 推理判断是否有 bag ----------
+        img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
+        input_data = np.expand_dims(img_resized, 0)
+        outputs = rknn.inference(inputs=[input_data])
+        has_bag = post_process(outputs, scale, dx, dy)
+
+        # ---------- 状态机 ----------
+        if state == STATE_IDLE:
+            if has_bag:
+                session_id += 1
+                ts = time.strftime("%Y%m%d_%H%M%S")
+                session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}")
+                os.makedirs(session_dir, exist_ok=True)
+                print(f"\n🚀 进入采集")
+                state = STATE_CAPTURING
+                miss_count = 0
+                save_idx = 0
+
+        elif state == STATE_CAPTURING:
+            if has_bag:
+                miss_count = 0
+            else:
+                miss_count += 1
+
+            if save_idx % SAVE_EVERY_N_FRAMES == 0:
+                ts = time.strftime("%Y%m%d_%H%M%S")
+                ms = int((time.time()%1)*1000)
+                fname = f"{save_idx:06d}_{ts}_{ms:03d}.png"
+                cv2.imwrite(os.path.join(session_dir, fname), frame)  # 保存原图
+            save_idx += 1
+
+            if miss_count >= END_MISS_FRAMES:
+                print(f"🛑 退出采集，本次保存 {save_idx} 帧")
+                state = STATE_IDLE
+                miss_count = 0
+                session_dir = None
+
+except KeyboardInterrupt:
+    print("\n🛑 用户退出")
+
+finally:
+    cap.release()
+    cv2.destroyAllWindows()
+    rknn.release()
+    print("程序结束")
+
--- a/detect_image/detect_bag.py
+++ b/detect_image/detect_bag.py
@ -0,0 +1,181 @@
+import os
+import cv2
+import numpy as np
+from rknnlite.api import RKNNLite
+
+# ====================== 配置 ======================
+MODEL_PATH = "bag3588.rknn"
+IMG_PATH = "2.jpg"
+IMG_SIZE = (640, 640)
+OBJ_THRESH = 0.001
+NMS_THRESH = 0.45
+CLASS_NAME = ["bag"]
+OUTPUT_DIR = "./result"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ====================== 全局 RKNN ======================
+_global_rknn = None
+
+def init_rknn(model_path):
+    global _global_rknn
+    if _global_rknn is None:
+        rknn = RKNNLite(verbose=False)
+        rknn.load_rknn(model_path)
+        rknn.init_runtime()
+        _global_rknn = rknn
+    return _global_rknn
+
+# ====================== 工具函数 ======================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+def dfl_numpy(position):
+    n, c, h, w = position.shape
+    p_num = 4
+    mc = c // p_num
+    y = position.reshape(n, p_num, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1,1,mc,1,1)
+    y = np.sum(y * acc, axis=2)
+    return y
+
+def box_process(position):
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1,1,grid_h,grid_w)
+    row = row.reshape(1,1,grid_h,grid_w)
+    grid = np.concatenate((col,row), axis=1)
+    stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1)
+    position = dfl_numpy(position)
+    box_xy = grid + 0.5 - position[:,0:2,:,:]
+    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
+    xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
+    return xyxy
+
+def filter_boxes(boxes, box_confidences, box_class_probs):
+    boxes = np.array(boxes).reshape(-1, 4)
+    box_confidences = np.array(box_confidences).reshape(-1)
+    box_class_probs = np.array(box_class_probs)
+    
+    class_ids = np.argmax(box_class_probs, axis=-1)
+    class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
+    scores = box_confidences * class_scores
+
+    mask = scores >= OBJ_THRESH
+    if np.sum(mask) == 0:
+        return None, None, None, None
+
+    boxes = boxes[mask]
+    classes = class_ids[mask]
+    scores = scores[mask]
+    conf_keep = box_confidences[mask]
+
+    x1, y1, x2, y2 = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(ovr <= NMS_THRESH)[0]
+        order = order[inds + 1]
+    return boxes[keep], classes[keep], scores[keep], conf_keep[keep]
+
+def post_process(outputs, scale, dx, dy):
+    boxes_list, conf_list, class_list = [], [], []
+    branch_num = 3
+    for i in range(branch_num):
+        boxes_list.append(box_process(outputs[i*3]))
+        conf_list.append(outputs[i*3+2])
+        class_list.append(outputs[i*3+1])
+
+    def flatten(x):
+        ch = x.shape[1]
+        x = x.transpose(0,2,3,1)
+        return x.reshape(-1,ch)
+
+    boxes = np.concatenate([flatten(b) for b in boxes_list])
+    box_conf = np.concatenate([flatten(c) for c in conf_list])
+    class_probs = np.concatenate([flatten(c) for c in class_list])
+
+    boxes, classes, scores, conf_keep = filter_boxes(boxes, box_conf, class_probs)
+    if boxes is None:
+        return None, None, None, None
+
+    boxes[:, [0,2]] -= dx
+    boxes[:, [1,3]] -= dy
+    boxes /= scale
+    boxes = boxes.clip(min=0)
+
+    scores = 1-scores
+    conf_keep = conf_keep * 255
+    return boxes, classes, scores, conf_keep
+
+# ====================== detect_bag ======================
+def detect_bag(img, return_conf=True, return_vis=False):
+    rknn = init_rknn(MODEL_PATH)
+
+    img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE)
+    input_data = np.expand_dims(img_resized, 0)
+    outputs = rknn.inference(inputs=[input_data])
+    boxes, classes, scores, conf_keep = post_process(outputs, scale, dx, dy)
+
+    if boxes is None or len(boxes) == 0:
+        return (None, None) if return_conf else (None,)
+
+    min_x = float(boxes[:,0].min())
+    conf_val = float(scores.max()) if return_conf else None
+    vis_img = None
+
+    if return_vis:
+        vis_img = img.copy()
+        for i, box in enumerate(boxes):
+            x1, y1, x2, y2 = box.astype(int)
+            cls_id = classes[i]
+            score = scores[i]
+            cv2.rectangle(vis_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+            cv2.putText(vis_img,
+                        f"{CLASS_NAME[cls_id]}:{score:.1f}",
+                        (x1, max(y1-5,0)),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        0.6,
+                        (0, 255, 0),
+                        2)
+        save_path = os.path.join(OUTPUT_DIR, "vis_" + "result.jpg")
+        cv2.imwrite(save_path, vis_img)
+
+    if return_conf:
+        return conf_val, min_x
+    else:
+        return min_x, vis_img
+
+# ====================== 测试 ======================
+if __name__ == "__main__":
+    img = cv2.imread(IMG_PATH)
+    if img is None:
+        raise FileNotFoundError(f"图片无法读取: {IMG_PATH}")
+
+    # 可控制输出：conf, vis
+    conf, min_x = detect_bag(img, return_conf=True, return_vis=True)
+
+    if conf is None:
+        print("❌ 未检测到 bag")
+    else:
+        print(f"✅ 最大置信度: {conf:.4f}, 最左 x: {min_x:.1f}")
+
--- a/detect_image/image_01_3588.py
+++ b/detect_image/image_01_3588.py
@ -0,0 +1,202 @@
+import cv2
+import time
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+import shutil
+from rknnlite.api import RKNNLite
+
+# ================== 配置参数 ==================
+RTSP_URL = "rtsp://admin:XJ123456@192.168.250.60:554/streaming/channels/101"
+SAVE_INTERVAL = 15
+SSIM_THRESHOLD = 0.9
+OUTPUT_DIR = "camera_test"
+RKNN_MODEL = "bag3588.rknn"
+SHOW_WINDOW = False
+
+# 灰度判断参数
+GRAY_LOWER = 70
+GRAY_UPPER = 230
+GRAY_RATIO_THRESHOLD = 0.7
+
+IMG_SIZE = (640, 640)
+OBJ_THRESH = 0.001
+NMS_THRESH = 0.45
+CLASS_NAME = ["bag"]
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ================== 灰度判断 ==================
+def is_large_gray(image):
+    img = np.array(image)
+    if img.ndim != 3 or img.shape[2] != 3:
+        return True
+    h, w, _ = img.shape
+    gray_mask = (
+        (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
+        (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
+        (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
+    )
+    return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
+
+# ================== RKNN 工具函数 ==================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+def dfl_numpy(position):
+    n, c, h, w = position.shape
+    p_num = 4
+    mc = c // p_num
+    y = position.reshape(n, p_num, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1,1,mc,1,1)
+    return np.sum(y * acc, axis=2)
+
+def box_process(position):
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1,1,grid_h,grid_w)
+    row = row.reshape(1,1,grid_h,grid_w)
+    grid = np.concatenate((col,row), axis=1)
+    stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
+    position = dfl_numpy(position)
+    box_xy = grid + 0.5 - position[:,0:2,:,:]
+    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
+    return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
+
+def filter_boxes(boxes, box_confidences, box_class_probs):
+    boxes = boxes.reshape(-1,4)
+    box_confidences = box_confidences.reshape(-1)
+    box_class_probs = np.array(box_class_probs)
+    
+    class_ids = np.argmax(box_class_probs, axis=-1)
+    class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
+    scores = box_confidences * class_scores
+
+    mask = scores >= OBJ_THRESH
+    if np.sum(mask) == 0:
+        return None
+    return True  # 只需要判断是否有目标
+
+def post_process(outputs, scale, dx, dy):
+    boxes_list, conf_list, class_list = [], [], []
+    for i in range(3):
+        boxes_list.append(box_process(outputs[i*3]))
+        conf_list.append(outputs[i*3+2])
+        class_list.append(outputs[i*3+1])
+
+    def flatten(x):
+        x = x.transpose(0,2,3,1)
+        return x.reshape(-1,x.shape[3])
+    
+    boxes = np.concatenate([flatten(b) for b in boxes_list])
+    box_conf = np.concatenate([flatten(c) for c in conf_list])
+    class_probs = np.concatenate([flatten(c) for c in class_list])
+    return filter_boxes(boxes, box_conf, class_probs)
+
+# ================== RKNN 初始化 ==================
+rknn = RKNNLite()
+if rknn.load_rknn(RKNN_MODEL) != 0:
+    raise RuntimeError("❌ RKNN 模型加载失败")
+if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
+    raise RuntimeError("❌ RKNN Runtime 初始化失败")
+print("✅ RKNN 初始化完成")
+
+# ================== 视频流处理 ==================
+max_retry_seconds = 10
+retry_interval_seconds = 1
+
+last_gray = None
+frame_count = 0
+
+while True:
+    cap = cv2.VideoCapture(RTSP_URL)
+    start_time = time.time()
+
+    while not cap.isOpened():
+        if time.time() - start_time >= max_retry_seconds:
+            print("❌ 无法连接 RTSP")
+            exit(1)
+        time.sleep(retry_interval_seconds)
+        cap = cv2.VideoCapture(RTSP_URL)
+
+    print("✅ 开始读取视频流")
+
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                print("❌ 读取失败")
+                break
+
+            frame_count += 1
+
+            if SHOW_WINDOW:
+                cv2.imshow("Camera", frame)
+                if cv2.waitKey(1) == ord('q'):
+                    raise KeyboardInterrupt
+
+            if frame_count % SAVE_INTERVAL != 0:
+                continue
+
+            print(f"处理帧 {frame_count}")
+
+            # STEP1: 灰度过滤
+            pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+            if is_large_gray(pil_image):
+                print("跳过：大面积灰色")
+                continue
+
+            # STEP2: SSIM 去重
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            if last_gray is not None:
+                sim = ssim(gray, last_gray)
+                if sim > SSIM_THRESHOLD:
+                    print(f"跳过：SSIM={sim:.3f}")
+                    continue
+            last_gray = gray.copy()
+
+            # STEP3: RKNN 推理，只判断是否有 bag
+            img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
+            input_data = np.expand_dims(img_resized, 0)
+            outputs = rknn.inference(inputs=[input_data])
+            has_bag = post_process(outputs, scale, dx, dy)
+            if not has_bag:
+                print("跳过：未检测到 bag")
+                continue
+
+            # STEP4: 磁盘检查
+            _, _, free = shutil.disk_usage(OUTPUT_DIR)
+            if free < 5*1024**3:
+                print("❌ 磁盘空间不足")
+                raise SystemExit(1)
+
+            # STEP5: 保存原图
+            ts = time.strftime("%Y%m%d_%H%M%S")
+            ms = int((time.time()%1)*1000)
+            filename = f"bag_{ts}_{ms:03d}.png"
+            path = os.path.join(OUTPUT_DIR, filename)
+            cv2.imwrite(path, frame)  # 保存原图
+            print(f"✅ 已保存: {path}")
+
+    except KeyboardInterrupt:
+        print("\n🛑 用户中断")
+        break
+
+    finally:
+        cap.release()
+        cv2.destroyAllWindows()
+        print(f"视频流关闭，共处理 {frame_count} 帧")
+
+rknn.release()
+print("程序结束")
+
--- a/detect_image/image_02_3588.py
+++ b/detect_image/image_02_3588.py
@ -0,0 +1,205 @@
+import cv2
+import time
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+from rknnlite.api import RKNNLite
+
+# ================== 配置 ==================
+RTSP_URL = "rtsp://admin:XJ123456@192.168.250.60:554/streaming/channels/101"
+RKNN_MODEL = "bag3588.rknn"
+OUTPUT_DIR = "camera_event_capture"
+
+CONF_THRESHOLD = 0.5
+SSIM_THRESHOLD = 0.9
+
+END_MISS_FRAMES = 30        # 连续多少帧未检测到 → 结束采集
+SAVE_EVERY_N_FRAMES = 1     # 采集中每 N 帧保存一次
+SHOW_WINDOW = False
+
+# 灰度判断参数
+GRAY_LOWER = 70
+GRAY_UPPER = 230
+GRAY_RATIO_THRESHOLD = 0.7
+
+IMG_SIZE = (640, 640)
+OBJ_THRESH = 0.001
+NMS_THRESH = 0.45
+CLASS_NAME = ["bag"]
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ================== 灰度判断 ==================
+def is_large_gray(image):
+    img = np.array(image)
+    if img.ndim != 3 or img.shape[2] != 3:
+        return True
+    h, w, _ = img.shape
+    gray_mask = (
+        (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
+        (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
+        (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
+    )
+    return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
+
+# ================== RKNN 推理工具 ==================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+def dfl_numpy(position):
+    n, c, h, w = position.shape
+    p_num = 4
+    mc = c // p_num
+    y = position.reshape(n, p_num, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1,1,mc,1,1)
+    return np.sum(y * acc, axis=2)
+
+def box_process(position):
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1,1,grid_h,grid_w)
+    row = row.reshape(1,1,grid_h,grid_w)
+    grid = np.concatenate((col,row), axis=1)
+    stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
+    position = dfl_numpy(position)
+    box_xy = grid + 0.5 - position[:,0:2,:,:]
+    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
+    return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
+
+def filter_boxes(boxes, box_confidences, box_class_probs):
+    boxes = boxes.reshape(-1,4)
+    box_confidences = box_confidences.reshape(-1)
+    box_class_probs = np.array(box_class_probs)
+    
+    class_ids = np.argmax(box_class_probs, axis=-1)
+    class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
+    scores = box_confidences * class_scores
+
+    mask = scores >= OBJ_THRESH
+    return np.sum(mask) > 0  # True: 有 bag, False: 无 bag
+
+def post_process(outputs, scale, dx, dy):
+    boxes_list, conf_list, class_list = [], [], []
+    for i in range(3):
+        boxes_list.append(box_process(outputs[i*3]))
+        conf_list.append(outputs[i*3+2])
+        class_list.append(outputs[i*3+1])
+
+    def flatten(x):
+        x = x.transpose(0,2,3,1)
+        return x.reshape(-1,x.shape[3])
+    
+    boxes = np.concatenate([flatten(b) for b in boxes_list])
+    box_conf = np.concatenate([flatten(c) for c in conf_list])
+    class_probs = np.concatenate([flatten(c) for c in class_list])
+    return filter_boxes(boxes, box_conf, class_probs)
+
+# ================== RKNN 初始化 ==================
+rknn = RKNNLite()
+if rknn.load_rknn(RKNN_MODEL) != 0:
+    raise RuntimeError("RKNN 模型加载失败")
+if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
+    raise RuntimeError("RKNN Runtime 初始化失败")
+print("✅ RKNN 初始化完成")
+
+# ================== 视频流 ==================
+cap = cv2.VideoCapture(RTSP_URL)
+if not cap.isOpened():
+    raise RuntimeError("RTSP 连接失败")
+print("🎥 视频流已连接")
+
+# ================== 状态机 ==================
+STATE_IDLE = 0
+STATE_CAPTURING = 1
+
+state = STATE_IDLE
+miss_count = 0
+save_idx = 0
+session_dir = None
+session_id = 0
+last_gray = None
+frame_count = 0
+
+try:
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            time.sleep(0.5)
+            continue
+
+        frame_count += 1
+
+        if SHOW_WINDOW:
+            cv2.imshow("Camera", frame)
+            if cv2.waitKey(1) == ord('q'):
+                break
+
+        # ---------- 灰度过滤 ----------
+        pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        if is_large_gray(pil_image):
+            continue
+
+        # ---------- SSIM 去重 ----------
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        if last_gray is not None and state == STATE_IDLE:
+            sim = ssim(gray, last_gray)
+            if sim > SSIM_THRESHOLD:
+                continue
+        last_gray = gray.copy()
+
+        # ---------- RKNN 推理判断是否有 bag ----------
+        img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
+        input_data = np.expand_dims(img_resized, 0)
+        outputs = rknn.inference(inputs=[input_data])
+        has_bag = post_process(outputs, scale, dx, dy)
+
+        # ---------- 状态机 ----------
+        if state == STATE_IDLE:
+            if has_bag:
+                session_id += 1
+                ts = time.strftime("%Y%m%d_%H%M%S")
+                session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}")
+                os.makedirs(session_dir, exist_ok=True)
+                print(f"\n🚀 进入采集")
+                state = STATE_CAPTURING
+                miss_count = 0
+                save_idx = 0
+
+        elif state == STATE_CAPTURING:
+            if has_bag:
+                miss_count = 0
+            else:
+                miss_count += 1
+
+            if save_idx % SAVE_EVERY_N_FRAMES == 0:
+                ts = time.strftime("%Y%m%d_%H%M%S")
+                ms = int((time.time()%1)*1000)
+                fname = f"{save_idx:06d}_{ts}_{ms:03d}.png"
+                cv2.imwrite(os.path.join(session_dir, fname), frame)  # 保存原图
+            save_idx += 1
+
+            if miss_count >= END_MISS_FRAMES:
+                print(f"🛑 退出采集，本次保存 {save_idx} 帧")
+                state = STATE_IDLE
+                miss_count = 0
+                session_dir = None
+
+except KeyboardInterrupt:
+    print("\n🛑 用户退出")
+
+finally:
+    cap.release()
+    cv2.destroyAllWindows()
+    rknn.release()
+    print("程序结束")
+