更新加入料带目标检测，判断料带到位，以及控制滚筒逻辑

2025-12-30 17:29:49 +08:00
parent d6918e90f2
commit 2028a96819
27 changed files with 1499 additions and 1224 deletions
--- a/ailai_pc/image_capture_detect/bag3568.rknn
+++ b/ailai_pc/image_capture_detect/bag3568.rknn
--- a/ailai_pc/image_capture_detect/bag3588.rknn
+++ b/ailai_pc/image_capture_detect/bag3588.rknn
--- a/ailai_pc/image_capture_detect/bag_detect.py
+++ b/ailai_pc/image_capture_detect/bag_detect.py
@ -0,0 +1,180 @@
+import os
+import cv2
+import numpy as np
+from rknnlite.api import RKNNLite
+
+# ====================== 配置 ======================
+MODEL_PATH = "bag3588.rknn"
+IMG_PATH = "1.jpg"
+IMG_SIZE = (640, 640)
+
+OBJ_THRESH = 0.25       # objectness * class_prob
+NMS_THRESH = 0.45
+
+CLASS_NAME = ["bag", "bag35"]
+
+OUTPUT_DIR = "./result"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+VISUALIZE = True   # False = 只输出类别和置信度，不保存图
+
+# ====================== 工具函数 ======================
+def softmax(x, axis=-1):
+    x = x - np.max(x, axis=axis, keepdims=True)
+    exp_x = np.exp(x)
+    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)
+
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx = (target_w - new_w) // 2
+    dy = (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+
+    return canvas, scale, dx, dy
+
+# ====================== DFL 解码 ======================
+def dfl_decode(reg):
+    reg = reg.reshape(4, -1)
+    prob = softmax(reg, axis=1)
+    acc = np.arange(reg.shape[1])
+    return np.sum(prob * acc, axis=1)
+
+# ====================== NMS ======================
+def nms(boxes, scores, thresh):
+    boxes = np.array(boxes)
+    scores = np.array(scores)
+
+    x1, y1, x2, y2 = boxes.T
+    areas = (x2 - x1) * (y2 - y1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
+        iou = inter / (areas[i] + areas[order[1:]] - inter)
+
+        order = order[1:][iou <= thresh]
+
+    return keep
+
+# ====================== 后处理 ======================
+def post_process(outputs, scale, dx, dy):
+    boxes_all, scores_all, classes_all = [], [], []
+
+    strides = [8, 16, 32]
+
+    for i, stride in enumerate(strides):
+        reg = outputs[i * 3 + 0][0]
+        cls = outputs[i * 3 + 1][0]
+        obj = outputs[i * 3 + 2][0]
+
+        num_classes, H, W = cls.shape
+
+        for h in range(H):
+            for w in range(W):
+                class_prob = cls[:, h, w]
+                cls_id = int(np.argmax(class_prob))
+                cls_score = class_prob[cls_id]
+
+                obj_score = obj[0, h, w]
+                final_score = cls_score * obj_score
+
+                if final_score < OBJ_THRESH:
+                    continue
+
+                l, t, r, b = dfl_decode(reg[:, h, w])
+
+                cx = (w + 0.5) * stride
+                cy = (h + 0.5) * stride
+
+                x1 = cx - l * stride
+                y1 = cy - t * stride
+                x2 = cx + r * stride
+                y2 = cy + b * stride
+
+                boxes_all.append([x1, y1, x2, y2])
+                scores_all.append(final_score)
+                classes_all.append(cls_id)
+
+    if not boxes_all:
+        return None, None, None
+
+    keep = nms(boxes_all, scores_all, NMS_THRESH)
+
+    boxes = np.array(boxes_all)[keep]
+    scores = np.array(scores_all)[keep]
+    classes = np.array(classes_all)[keep]
+
+    boxes[:, [0, 2]] = (boxes[:, [0, 2]] - dx) / scale
+    boxes[:, [1, 3]] = (boxes[:, [1, 3]] - dy) / scale
+
+    return boxes, classes, scores
+
+# ====================== 推理入口 ======================
+def detect_single_image(img_path, visualize=True):
+    rknn = RKNNLite()
+    rknn.load_rknn(MODEL_PATH)
+    rknn.init_runtime()
+
+    img = cv2.imread(img_path)
+    if img is None:
+        raise FileNotFoundError(img_path)
+
+    img_r, scale, dx, dy = letterbox_resize(img, IMG_SIZE)
+    outputs = rknn.inference([np.expand_dims(img_r, 0)])
+
+    boxes, cls_ids, scores = post_process(outputs, scale, dx, dy)
+
+    if boxes is None or len(scores) == 0:
+        print("未检测到目标")
+        rknn.release()
+        return None, None
+
+    best_idx = int(np.argmax(scores))
+    best_score = float(scores[best_idx])
+    best_cls_id = int(cls_ids[best_idx])
+    best_cls_name = CLASS_NAME[best_cls_id]
+    best_box = boxes[best_idx]
+
+    # ======== 可视化（可选） ========
+    if visualize:
+        x1, y1, x2, y2 = best_box.astype(int)
+        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        cv2.putText(
+            img,
+            f"{best_cls_name}:{best_score:.3f}",
+            (x1, max(y1 - 5, 0)),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.6,
+            (0, 255, 0),
+            2
+        )
+
+        save_path = os.path.join(OUTPUT_DIR, os.path.basename(img_path))
+        cv2.imwrite(save_path, img)
+        print("可视化结果已保存:", save_path)
+
+    rknn.release()
+    return best_cls_name, best_score
+
+# ====================== 主入口 ======================
+if __name__ == "__main__":
+    best_cls_name, best_score = detect_single_image(IMG_PATH, visualize=VISUALIZE)
+    # ======== 只输出你要的 ========
+    print(f"类别: {best_cls_name}, 置信度: {best_score:.4f}")
+
--- a/ailai_pc/image_capture_detect/capture-image_1.py
+++ b/ailai_pc/image_capture_detect/capture-image_1.py
@ -0,0 +1,223 @@
+import cv2
+import time
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+import shutil
+from rknnlite.api import RKNNLite
+
+# ================== 配置参数 ==================
+RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.125:554/streaming/channels/101"
+SAVE_INTERVAL = 15
+SSIM_THRESHOLD = 0.9
+OUTPUT_DIR = "camera_test"
+MODEL_PATH = "bag3568.rknn"
+SHOW_WINDOW = False
+
+GRAY_LOWER = 70
+GRAY_UPPER = 230
+GRAY_RATIO_THRESHOLD = 0.7
+
+IMG_SIZE = (640, 640)
+OBJ_THRESH = 0.25
+NMS_THRESH = 0.45
+CLASS_NAME = ["bag"]
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ================== 基础工具 ==================
+def softmax(x, axis=-1):
+    x = x - np.max(x, axis=axis, keepdims=True)
+    exp_x = np.exp(x)
+    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)
+
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy+new_h, dx:dx+new_w] = resized
+    return canvas, scale, dx, dy
+
+# ================== DFL ==================
+def dfl_decode(reg):
+    reg = reg.reshape(4, -1)
+    prob = softmax(reg, axis=1)
+    acc = np.arange(reg.shape[1])
+    return np.sum(prob * acc, axis=1)
+
+# ================== NMS ==================
+def nms(boxes, scores, thresh):
+    boxes = np.array(boxes)
+    scores = np.array(scores)
+
+    x1, y1, x2, y2 = boxes.T
+    areas = (x2 - x1) * (y2 - y1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
+        iou = inter / (areas[i] + areas[order[1:]] - inter)
+
+        order = order[1:][iou <= thresh]
+
+    return keep
+
+# ================== 后处理 ==================
+def post_process(outputs, scale, dx, dy):
+    boxes_all, scores_all, classes_all = [], [], []
+    strides = [8, 16, 32]
+
+    for i, stride in enumerate(strides):
+        reg = outputs[i*3 + 0][0]
+        cls = outputs[i*3 + 1][0]
+        obj = outputs[i*3 + 2][0]
+
+        num_classes, H, W = cls.shape
+        for h in range(H):
+            for w in range(W):
+                class_prob = cls[:, h, w]
+                cls_id = np.argmax(class_prob)
+                score = class_prob[cls_id]
+
+                obj_score = obj[0, h, w]
+                final_score = score * obj_score
+                if final_score < OBJ_THRESH:
+                    continue
+
+                l, t, r, b = dfl_decode(reg[:, h, w])
+                cx = (w + 0.5) * stride
+                cy = (h + 0.5) * stride
+
+                x1 = cx - l * stride
+                y1 = cy - t * stride
+                x2 = cx + r * stride
+                y2 = cy + b * stride
+
+                boxes_all.append([x1, y1, x2, y2])
+                scores_all.append(final_score)
+                classes_all.append(cls_id)
+
+    if len(boxes_all) == 0:
+        return None, None, None
+
+    keep = nms(boxes_all, scores_all, NMS_THRESH)
+    boxes = np.array(boxes_all)[keep]
+    scores = np.array(scores_all)[keep]
+    classes = np.array(classes_all)[keep]
+
+    boxes[:, [0, 2]] = (boxes[:, [0, 2]] - dx) / scale
+    boxes[:, [1, 3]] = (boxes[:, [1, 3]] - dy) / scale
+
+    return boxes, classes, scores
+
+# ================== 灰度判断 ==================
+def is_large_gray(image):
+    img = np.array(image)
+    if img.ndim != 3 or img.shape[2] != 3:
+        return True
+    h, w, _ = img.shape
+    gray_mask = (
+        (img[:,:,0] >= GRAY_LOWER) & (img[:,:,0] <= GRAY_UPPER) &
+        (img[:,:,1] >= GRAY_LOWER) & (img[:,:,1] <= GRAY_UPPER) &
+        (img[:,:,2] >= GRAY_LOWER) & (img[:,:,2] <= GRAY_UPPER)
+    )
+    return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
+
+# ================== RKNN 初始化 ==================
+rknn = RKNNLite()
+if rknn.load_rknn(MODEL_PATH) != 0:
+    raise RuntimeError("❌ RKNN 模型加载失败")
+if rknn.init_runtime() != 0:
+    raise RuntimeError("❌ RKNN Runtime 初始化失败")
+print("✅ RKNN 初始化完成")
+
+# ================== 视频流处理 ==================
+last_gray = None
+frame_count = 0
+
+while True:
+    cap = cv2.VideoCapture(RTSP_URL)
+    if not cap.isOpened():
+        print("❌ 无法连接 RTSP")
+        time.sleep(1)
+        continue
+
+    print("✅ 开始读取视频流")
+
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+
+            frame_count += 1
+
+            if SHOW_WINDOW:
+                cv2.imshow("Camera", frame)
+                if cv2.waitKey(1) == ord('q'):
+                    raise KeyboardInterrupt
+
+            if frame_count % SAVE_INTERVAL != 0:
+                continue
+
+            print(f"处理帧 {frame_count}")
+
+            # STEP1 灰度过滤（可启用）
+            # if is_large_gray(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))):
+            #     print("跳过：大面积灰色")
+            #     continue
+
+            # STEP2 SSIM 去重
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            if last_gray is not None:
+                sim = ssim(gray, last_gray)
+                if sim > SSIM_THRESHOLD:
+                    print(f"跳过：SSIM={sim:.3f}")
+                    continue
+            last_gray = gray.copy()
+
+            # STEP3 RKNN 推理（和 detect_single_image 一样）
+            img_r, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
+            outputs = rknn.inference([np.expand_dims(img_r, 0)])
+            boxes, cls_ids, scores = post_process(outputs, scale, dx, dy)
+
+            if boxes is None or len(boxes) == 0:
+                print("跳过：未检测到 bag")
+                continue
+
+            # STEP4 磁盘检查
+            _, _, free = shutil.disk_usage(OUTPUT_DIR)
+            if free < 5 * 1024**3:
+                raise SystemExit("❌ 磁盘空间不足")
+
+            # STEP5 保存
+            ts = time.strftime("%Y%m%d_%H%M%S")
+            ms = int((time.time() % 1) * 1000)
+            path = os.path.join(OUTPUT_DIR, f"bag_{ts}_{ms:03d}.png")
+            cv2.imwrite(path, frame)
+            print(f"✅ 已保存: {path}")
+
+    except KeyboardInterrupt:
+        print("🛑 用户中断")
+        break
+    finally:
+        cap.release()
+        cv2.destroyAllWindows()
+
+rknn.release()
+print("程序结束")
+
--- a/ailai_pc/image_capture_detect/capture-image_2.py
+++ b/ailai_pc/image_capture_detect/capture-image_2.py
@ -0,0 +1,213 @@
+import cv2
+import time
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+from rknnlite.api import RKNNLite
+import shutil
+
+# ================== 配置 ==================
+RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.125:554/streaming/channels/101"
+RKNN_MODEL = "bag3568.rknn"
+OUTPUT_DIR = "camera_event_capture"
+
+CONF_THRESHOLD = 0.25          # bag 最终置信度阈值（obj * class_prob）
+SSIM_THRESHOLD = 0.9
+
+END_MISS_FRAMES = 30
+SAVE_EVERY_N_FRAMES = 1
+SHOW_WINDOW = False
+
+IMG_SIZE = (640, 640)
+CLASS_NAME = ["bag"]
+NMS_THRESH = 0.45
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ================== 基础工具 ==================
+def softmax(x, axis=-1):
+    x = x - np.max(x, axis=axis, keepdims=True)
+    exp_x = np.exp(x)
+    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)
+
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy+new_h, dx:dx+new_w] = resized
+    return canvas, scale, dx, dy
+
+def dfl_decode(reg):
+    reg = reg.reshape(4, -1)
+    prob = softmax(reg, axis=1)
+    acc = np.arange(reg.shape[1])
+    return np.sum(prob * acc, axis=1)
+
+def nms(boxes, scores, thresh):
+    boxes = np.array(boxes)
+    scores = np.array(scores)
+    x1, y1, x2, y2 = boxes.T
+    areas = (x2 - x1) * (y2 - y1)
+    order = scores.argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
+        iou = inter / (areas[i] + areas[order[1:]] - inter)
+        order = order[1:][iou <= thresh]
+    return keep
+
+def post_process(outputs, scale, dx, dy):
+    boxes_all, scores_all, classes_all = [], [], []
+    strides = [8, 16, 32]
+
+    for i, stride in enumerate(strides):
+        reg = outputs[i*3 + 0][0]
+        cls = outputs[i*3 + 1][0]
+        obj = outputs[i*3 + 2][0]
+
+        num_classes, H, W = cls.shape
+        for h in range(H):
+            for w in range(W):
+                class_prob = cls[:, h, w]
+                cls_id = np.argmax(class_prob)
+                score = class_prob[cls_id]
+                obj_score = obj[0, h, w]
+                final_score = score * obj_score
+                if final_score < CONF_THRESHOLD:
+                    continue
+                l, t, r, b = dfl_decode(reg[:, h, w])
+                cx = (w + 0.5) * stride
+                cy = (h + 0.5) * stride
+                x1 = cx - l * stride
+                y1 = cy - t * stride
+                x2 = cx + r * stride
+                y2 = cy + b * stride
+                boxes_all.append([x1, y1, x2, y2])
+                scores_all.append(final_score)
+                classes_all.append(cls_id)
+
+    if len(boxes_all) == 0:
+        return None, None, None
+
+    keep = nms(boxes_all, scores_all, NMS_THRESH)
+    boxes = np.array(boxes_all)[keep]
+    scores = np.array(scores_all)[keep]
+    classes = np.array(classes_all)[keep]
+    boxes[:, [0,2]] = (boxes[:, [0,2]] - dx) / scale
+    boxes[:, [1,3]] = (boxes[:, [1,3]] - dy) / scale
+    return boxes, classes, scores
+
+# ================== 灰度判断 ==================
+def is_large_gray(image, gray_ratio_thresh=0.9):
+    img = np.array(image).astype(np.float32)
+    if img.ndim != 3 or img.shape[2] != 3:
+        return True
+    b, g, r = img[:,:,0], img[:,:,1], img[:,:,2]
+    max_c = np.maximum(np.maximum(r,g), b)
+    min_c = np.minimum(np.minimum(r,g), b)
+    gray_ratio = 1.0 - (max_c - min_c)/255.0
+    gray_pixels = np.sum(gray_ratio >= 0.9)
+    total_pixels = img.shape[0]*img.shape[1]
+    return (gray_pixels/total_pixels) >= gray_ratio_thresh
+
+# ================== RKNN 初始化 ==================
+rknn = RKNNLite()
+assert rknn.load_rknn(RKNN_MODEL) == 0, "RKNN 模型加载失败"
+assert rknn.init_runtime() == 0, "RKNN Runtime 初始化失败"
+print("✅ RKNN 初始化完成")
+
+# ================== 视频流 & 状态机 ==================
+cap = cv2.VideoCapture(RTSP_URL)
+assert cap.isOpened(), "RTSP 连接失败"
+print("🎥 视频流已连接")
+
+STATE_IDLE = 0
+STATE_CAPTURING = 1
+
+state = STATE_IDLE
+miss_count = 0
+save_idx = 0
+session_dir = None
+session_id = 0
+last_gray = None
+frame_count = 0
+
+try:
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            time.sleep(0.2)
+            continue
+        frame_count += 1
+
+        if SHOW_WINDOW:
+            cv2.imshow("Camera", frame)
+            if cv2.waitKey(1) == ord('q'):
+                break
+
+        # ---------- 灰度过滤 ----------
+        #pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        #if is_large_gray(pil_img):
+        #    continue
+
+        # ---------- SSIM ----------
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        if last_gray is not None and state == STATE_IDLE:
+            if ssim(gray, last_gray) > SSIM_THRESHOLD:
+                continue
+        last_gray = gray.copy()
+
+        # ---------- RKNN 推理 ----------
+        img_r, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
+        outputs = rknn.inference([np.expand_dims(img_r, 0)])
+        boxes, cls_ids, scores = post_process(outputs, scale, dx, dy)
+        has_bag = boxes is not None and len(boxes) > 0
+
+        # ---------- 状态机 ----------
+        if state == STATE_IDLE:
+            if has_bag:
+                session_id += 1
+                ts = time.strftime("%Y%m%d_%H%M%S")
+                session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}")
+                os.makedirs(session_dir, exist_ok=True)
+                print("🚀 进入采集")
+                state = STATE_CAPTURING
+                miss_count = 0
+                save_idx = 0
+        else:
+            if has_bag:
+                miss_count = 0
+            else:
+                miss_count += 1
+
+            if save_idx % SAVE_EVERY_N_FRAMES == 0:
+                fname = f"{save_idx:06d}.png"
+                cv2.imwrite(os.path.join(session_dir, fname), frame)
+            save_idx += 1
+
+            if miss_count >= END_MISS_FRAMES:
+                print(f"🛑 退出采集，保存 {save_idx} 帧")
+                state = STATE_IDLE
+                miss_count = 0
+                session_dir = None
+
+except KeyboardInterrupt:
+    print("\n🛑 用户退出")
+
+finally:
+    cap.release()
+    cv2.destroyAllWindows()
+    rknn.release()
+    print("程序结束")
+
--- a/ailai_pc/image_capture_detect/image_01_3588.py
+++ b/ailai_pc/image_capture_detect/image_01_3588.py
@ -0,0 +1,171 @@
+import cv2
+import time
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+import shutil
+from rknnlite.api import RKNNLite
+
+# ================== 配置参数 ==================
+RTSP_URL = "rtsp://admin:XJ123456@192.168.250.60:554/streaming/channels/101"
+RKNN_MODEL = "bag3588.rknn"
+SAVE_INTERVAL = 15
+SSIM_THRESHOLD = 0.9
+OUTPUT_DIR = "camera_test"
+SHOW_WINDOW = False
+
+GRAY_LOWER = 70
+GRAY_UPPER = 230
+GRAY_RATIO_THRESHOLD = 0.7
+
+IMG_SIZE = (640, 640)
+OBJ_THRESH = 0.25
+CLASS_NAME = ["bag"]
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# ================== 灰度判断 ==================
+def is_large_gray(image):
+    img = np.array(image)
+    if img.ndim != 3 or img.shape[2] != 3:
+        return True
+    h, w, _ = img.shape
+    gray_mask = (
+        (img[:,:,0] >= GRAY_LOWER) & (img[:,:,0] <= GRAY_UPPER) &
+        (img[:,:,1] >= GRAY_LOWER) & (img[:,:,1] <= GRAY_UPPER) &
+        (img[:,:,2] >= GRAY_LOWER) & (img[:,:,2] <= GRAY_UPPER)
+    )
+    return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
+
+# ================== RKNN 工具 ==================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy+new_h, dx:dx+new_w] = resized
+    return canvas, scale, dx, dy
+
+# ================== DFL ==================
+def dfl_numpy(position):
+    n, c, h, w = position.shape
+    mc = c // 4
+    y = position.reshape(n, 4, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1,1,mc,1,1)
+    return np.sum(y * acc, axis=2)
+
+def box_process(position):
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1,1,grid_h,grid_w)
+    row = row.reshape(1,1,grid_h,grid_w)
+    grid = np.concatenate((col,row), axis=1)
+    stride = np.array([
+        IMG_SIZE[1]//grid_h,
+        IMG_SIZE[0]//grid_w
+    ]).reshape(1,2,1,1)
+
+    position = dfl_numpy(position)
+    box_xy1 = grid + 0.5 - position[:,0:2,:,:]
+    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
+    return np.concatenate((box_xy1*stride, box_xy2*stride), axis=1)
+
+# ================== 核心修改：只用 cls 置信度 ==================
+def has_bag_from_outputs(outputs):
+    """
+    只判断是否存在 cls_prob >= OBJ_THRESH
+    """
+    for i in range(3):
+        cls_map = outputs[i*3 + 1][0]  # (1,H,W)
+        if cls_map.max() >= OBJ_THRESH:
+            return True
+    return False
+
+# ================== RKNN 初始化 ==================
+rknn = RKNNLite()
+if rknn.load_rknn(RKNN_MODEL) != 0:
+    raise RuntimeError("❌ RKNN 模型加载失败")
+if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
+    raise RuntimeError("❌ RKNN Runtime 初始化失败")
+print("✅ RKNN 初始化完成")
+
+# ================== 视频流处理 ==================
+last_gray = None
+frame_count = 0
+
+while True:
+    cap = cv2.VideoCapture(RTSP_URL)
+    if not cap.isOpened():
+        print("❌ 无法连接 RTSP")
+        time.sleep(1)
+        continue
+
+    print("✅ 开始读取视频流")
+
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+
+            frame_count += 1
+
+            if SHOW_WINDOW:
+                cv2.imshow("Camera", frame)
+                if cv2.waitKey(1) == ord('q'):
+                    raise KeyboardInterrupt
+
+            if frame_count % SAVE_INTERVAL != 0:
+                continue
+
+            print(f"处理帧 {frame_count}")
+
+            # STEP1 灰度过滤
+            #if is_large_gray(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))):
+                #print("跳过：大面积灰色")
+                #continue
+
+            # STEP2 SSIM 去重
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            if last_gray is not None:
+                sim = ssim(gray, last_gray)
+                if sim > SSIM_THRESHOLD:
+                    print(f"跳过：SSIM={sim:.3f}")
+                    continue
+            last_gray = gray.copy()
+
+            # STEP3 RKNN 推理（只判断 cls）
+            img_r, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
+            outputs = rknn.inference([np.expand_dims(img_r, 0)])
+
+            if not has_bag_from_outputs(outputs):
+                print("跳过：未检测到 bag")
+                continue
+
+            # STEP4 磁盘检查
+            _, _, free = shutil.disk_usage(OUTPUT_DIR)
+            if free < 5 * 1024**3:
+                raise SystemExit("❌ 磁盘空间不足")
+
+            # STEP5 保存
+            ts = time.strftime("%Y%m%d_%H%M%S")
+            ms = int((time.time() % 1) * 1000)
+            path = os.path.join(OUTPUT_DIR, f"bag_{ts}_{ms:03d}.png")
+            cv2.imwrite(path, frame)
+            print(f"✅ 已保存: {path}")
+
+    except KeyboardInterrupt:
+        print("🛑 用户中断")
+        break
+    finally:
+        cap.release()
+        cv2.destroyAllWindows()
+
+rknn.release()
+print("程序结束")
+
--- a/ailai_pc/image_capture_detect/image_02_3588.py
+++ b/ailai_pc/image_capture_detect/image_02_3588.py
@ -0,0 +1,209 @@
+import cv2
+import time
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+from rknnlite.api import RKNNLite
+
+# ================== 配置 ==================
+RTSP_URL = "rtsp://admin:XJ123456@192.168.250.60:554/streaming/channels/101"
+RKNN_MODEL = "bag3588.rknn"
+OUTPUT_DIR = "camera_event_capture"
+
+CONF_THRESHOLD = 0.25          # ← bag class prob 阈值（真实置信度）
+SSIM_THRESHOLD = 0.9
+
+END_MISS_FRAMES = 30
+SAVE_EVERY_N_FRAMES = 1
+SHOW_WINDOW = False
+
+IMG_SIZE = (640, 640)
+CLASS_NAME = ["bag"]
+
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# =====================================================
+# 灰度判断：≥90% 像素为灰色（R≈G≈B）
+# =====================================================
+def is_large_gray(image, gray_ratio_thresh=0.9):
+    img = np.array(image).astype(np.float32)
+    if img.ndim != 3 or img.shape[2] != 3:
+        return True
+
+    b, g, r = img[:, :, 0], img[:, :, 1], img[:, :, 2]
+    max_c = np.maximum(np.maximum(r, g), b)
+    min_c = np.minimum(np.minimum(r, g), b)
+
+    gray_ratio = 1.0 - (max_c - min_c) / 255.0
+    gray_pixels = np.sum(gray_ratio >= 0.9)
+    total_pixels = img.shape[0] * img.shape[1]
+
+    return (gray_pixels / total_pixels) >= gray_ratio_thresh
+
+
+# =====================================================
+# RKNN 推理工具
+# =====================================================
+def letterbox_resize(image, size, bg_color=114):
+    target_w, target_h = size
+    h, w = image.shape[:2]
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
+    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
+    canvas[dy:dy + new_h, dx:dx + new_w] = resized
+    return canvas, scale, dx, dy
+
+
+def dfl_numpy(position):
+    n, c, h, w = position.shape
+    p_num = 4
+    mc = c // p_num
+    y = position.reshape(n, p_num, mc, h, w)
+    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
+    acc = np.arange(mc).reshape(1, 1, mc, 1, 1)
+    return np.sum(y * acc, axis=2)
+
+
+def box_process(position):
+    grid_h, grid_w = position.shape[2:4]
+    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
+    col = col.reshape(1, 1, grid_h, grid_w)
+    row = row.reshape(1, 1, grid_h, grid_w)
+    grid = np.concatenate((col, row), axis=1)
+    stride = np.array([IMG_SIZE[1] // grid_w, IMG_SIZE[0] // grid_h]).reshape(1, 2, 1, 1)
+
+    position = dfl_numpy(position)
+    box_xy1 = grid + 0.5 - position[:, 0:2, :, :]
+    box_xy2 = grid + 0.5 + position[:, 2:4, :, :]
+    return np.concatenate((box_xy1 * stride, box_xy2 * stride), axis=1)
+
+
+# =====================================================
+# ✅ 关键修改：只用 class prob 作为置信度
+# =====================================================
+def filter_boxes(box_class_probs):
+    """
+    rknn_model_zoo 风格：
+    - 没有 obj_conf
+    - bag 置信度 = class_prob
+    """
+    box_class_probs = np.array(box_class_probs)
+    bag_scores = box_class_probs[:, 0]   # 只有一个类别 bag
+    return np.any(bag_scores >= CONF_THRESHOLD)
+
+
+def post_process(outputs):
+    boxes_list, class_list = [], []
+
+    for i in range(3):
+        boxes_list.append(box_process(outputs[i * 3]))
+        class_list.append(outputs[i * 3 + 1])
+
+    def flatten(x):
+        x = x.transpose(0, 2, 3, 1)
+        return x.reshape(-1, x.shape[3])
+
+    class_probs = np.concatenate([flatten(c) for c in class_list])
+    return filter_boxes(class_probs)
+
+
+# =====================================================
+# RKNN 初始化
+# =====================================================
+rknn = RKNNLite()
+assert rknn.load_rknn(RKNN_MODEL) == 0, "RKNN 模型加载失败"
+assert rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) == 0, "RKNN Runtime 初始化失败"
+print("✅ RKNN 初始化完成")
+
+
+# =====================================================
+# 视频流 & 状态机
+# =====================================================
+cap = cv2.VideoCapture(RTSP_URL)
+assert cap.isOpened(), "RTSP 连接失败"
+print("🎥 视频流已连接")
+
+STATE_IDLE = 0
+STATE_CAPTURING = 1
+
+state = STATE_IDLE
+miss_count = 0
+save_idx = 0
+session_dir = None
+session_id = 0
+last_gray = None
+frame_count = 0
+
+
+try:
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            time.sleep(0.2)
+            continue
+
+        frame_count += 1
+
+        if SHOW_WINDOW:
+            cv2.imshow("Camera", frame)
+            if cv2.waitKey(1) == ord('q'):
+                break
+
+        # ---------- 灰度过滤 ----------
+        pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        if is_large_gray(pil_img):
+            continue
+
+        # ---------- SSIM ----------
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        if last_gray is not None and state == STATE_IDLE:
+            if ssim(gray, last_gray) > SSIM_THRESHOLD:
+                continue
+        last_gray = gray.copy()
+
+        # ---------- RKNN 推理 ----------
+        img_resized, _, _, _ = letterbox_resize(frame, IMG_SIZE)
+        outputs = rknn.inference(inputs=[np.expand_dims(img_resized, 0)])
+        has_bag = post_process(outputs)
+
+        # ---------- 状态机 ----------
+        if state == STATE_IDLE:
+            if has_bag:
+                session_id += 1
+                ts = time.strftime("%Y%m%d_%H%M%S")
+                session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}")
+                os.makedirs(session_dir, exist_ok=True)
+                print("🚀 进入采集")
+                state = STATE_CAPTURING
+                miss_count = 0
+                save_idx = 0
+
+        else:  # STATE_CAPTURING
+            if has_bag:
+                miss_count = 0
+            else:
+                miss_count += 1
+
+            if save_idx % SAVE_EVERY_N_FRAMES == 0:
+                fname = f"{save_idx:06d}.png"
+                cv2.imwrite(os.path.join(session_dir, fname), frame)
+            save_idx += 1
+
+            if miss_count >= END_MISS_FRAMES:
+                print(f"🛑 退出采集，保存 {save_idx} 帧")
+                state = STATE_IDLE
+                miss_count = 0
+                session_dir = None
+
+except KeyboardInterrupt:
+    print("\n🛑 用户退出")
+
+finally:
+    cap.release()
+    cv2.destroyAllWindows()
+    rknn.release()
+    print("程序结束")
+