diff --git a/ailai_pc/chose_ROI.py b/ailai_pc/chose_ROI.py new file mode 100644 index 0000000..13e4e9d --- /dev/null +++ b/ailai_pc/chose_ROI.py @@ -0,0 +1,93 @@ +import cv2 +import numpy as np +import os + +# 全局变量 +drawing = False # 是否正在绘制 +ix, iy = -1, -1 # 起始点 +roi_list = [] # 存储多个 ROI 坐标 [(x, y, w, h), ...] +image_path = "1.jpg" # <<< 修改为你自己的图像路径 +save_dir = "./roi_111/1.txt" # 保存坐标的目录 + +# 创建保存目录 +os.makedirs(save_dir, exist_ok=True) + +def draw_rectangle(event, x, y, flags, param): + global ix, iy, drawing, img_copy, roi_list + + if event == cv2.EVENT_LBUTTONDOWN: + drawing = True + ix, iy = x, y + + elif event == cv2.EVENT_MOUSEMOVE: + if drawing: + # 每次移动都恢复原始图像,重新画矩形 + img_copy = img.copy() + cv2.rectangle(img_copy, (ix, iy), (x, y), (0, 255, 0), 2) + cv2.imshow("Select ROI", img_copy) + + elif event == cv2.EVENT_LBUTTONUP: + drawing = False + w = x - ix + h = y - iy + if w != 0 and h != 0: + # 确保宽高为正 + x_start = min(ix, x) + y_start = min(iy, y) + w = abs(w) + h = abs(h) + cv2.rectangle(img_copy, (x_start, y_start), (x_start + w, y_start + h), (0, 255, 0), 2) + cv2.imshow("Select ROI", img_copy) + # 添加到列表 + roi_list.append((x_start, y_start, w, h)) + print(f"已选择 ROI: (x={x_start}, y={y_start}, w={w}, h={h})") + +# 保存坐标到 .txt 文件的函数 +def save_rois_to_txt(rois, filepath): + with open(filepath, 'w') as file: + for roi in rois: + # 将每个 ROI 转换为字符串并写入文件,每行一个 ROI + line = ','.join(map(str, roi)) + '\n' + file.write(line) + print(f"💾 ROI 坐标已保存至: {filepath}") + +def select_roi(image_path): + global img, img_copy + + img = cv2.imread(image_path) + if img is None: + print(f"❌ 无法读取图像: {image_path}") + return + + img_copy = img.copy() + cv2.namedWindow("Select ROI") + cv2.setMouseCallback("Select ROI", draw_rectangle) + + print("📌 使用鼠标左键拖拽选择 ROI") + print("✅ 选择完成后按 's' 键保存坐标") + print("⏭️ 按 'n' 键跳过/下一步(可自定义)") + print("🚪 按 'q' 键退出") + + while True: + cv2.imshow("Select ROI", img_copy) + key = cv2.waitKey(1) & 0xFF + + if key == ord('s'): + # 保存坐标 + base_name = os.path.splitext(os.path.basename(image_path))[0] + save_path = os.path.join(save_dir, f"{base_name}_rois1.txt") # 修改了扩展名为 .txt + save_rois_to_txt(roi_list, save_path) # 使用新的保存函数 + + elif key == ord('n'): + print("⏭️ 跳到下一张图片(此处可扩展)") + break + + elif key == ord('q'): + print("👋 退出程序") + cv2.destroyAllWindows() + return + + cv2.destroyAllWindows() + +if __name__ == "__main__": + select_roi(image_path) \ No newline at end of file diff --git a/ailai_pc/divid_conf—box.py b/ailai_pc/divid_conf—box.py new file mode 100644 index 0000000..4748ddf --- /dev/null +++ b/ailai_pc/divid_conf—box.py @@ -0,0 +1,65 @@ +import cv2 +import os +import shutil +from ultralytics import YOLO + +# ====================== 配置 ====================== +MODEL_PATH = 'point.pt' +IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/ailaipoint' + +OUTPUT_ROOT = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/ailaipoint/train_split' +OUTPUT_DIR_0 = os.path.join(OUTPUT_ROOT, '0') # 无目标 / conf=0 +OUTPUT_DIR_1 = os.path.join(OUTPUT_ROOT, '1') # 0 < conf < 0.5 +OUTPUT_DIR_2 = os.path.join(OUTPUT_ROOT, '2') # conf >= 0.5 + +for d in [OUTPUT_DIR_0, OUTPUT_DIR_1, OUTPUT_DIR_2]: + os.makedirs(d, exist_ok=True) + +IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp'} + +# ====================== 主程序 ====================== +if __name__ == "__main__": + print("🚀 bbox 置信度分桶(移动原图,含无目标图像)") + + model = YOLO(MODEL_PATH) + + image_files = [ + f for f in os.listdir(IMAGE_SOURCE_DIR) + if os.path.splitext(f.lower())[1] in IMG_EXTENSIONS + ] + + print(f"📸 找到图片 {len(image_files)} 张") + + for img_name in image_files: + src_path = os.path.join(IMAGE_SOURCE_DIR, img_name) + + img = cv2.imread(src_path) + if img is None: + continue + + results = model(img, verbose=False) + + # ====================== 关键修复点 ====================== + if not results or results[0].boxes is None or len(results[0].boxes.conf) == 0: + # 没有任何检测框 → 当作 conf = 0 + bbox_conf = 0.0 + else: + # 有检测框 → 取第一个(或最大 conf) + bbox_conf = float(results[0].boxes.conf[0].cpu().item()) + + # ====================== 分桶 ====================== + if bbox_conf == 0: + dst_dir = OUTPUT_DIR_0 + elif bbox_conf < 0.5: + dst_dir = OUTPUT_DIR_1 + else: + dst_dir = OUTPUT_DIR_2 + + dst_path = os.path.join(dst_dir, img_name) + + # ====================== 移动文件 ====================== + shutil.move(src_path, dst_path) + + print(f"{img_name} -> conf={bbox_conf:.3f} -> {os.path.basename(dst_dir)}") + + print("✅ 完成(含无目标图片)") diff --git a/ailai_pc/point_error_test.py b/ailai_pc/point_error_test.py index c738808..67d50ff 100644 --- a/ailai_pc/point_error_test.py +++ b/ailai_pc/point_error_test.py @@ -6,8 +6,8 @@ from ultralytics import YOLO # ====================== 用户配置 ====================== #MODEL_PATH = '/home/hx/yolo/ultralytics_yolo11-main/runs/train/exp_ailai2/weights/best.pt' MODEL_PATH = 'point.pt' -IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212' # 验证集图片目录 -LABEL_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212' # 标签目录(与图片同名 .txt) +IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251214' # 验证集图片目录 +LABEL_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251214' # 标签目录(与图片同名 .txt) OUTPUT_DIR = './output_images' diff --git a/ailai_pc/roi_111/1.txt/1_rois1.txt b/ailai_pc/roi_111/1.txt/1_rois1.txt new file mode 100644 index 0000000..e69de29 diff --git a/detect_image/1.jpg b/detect_image/1.jpg new file mode 100644 index 0000000..e69faee Binary files /dev/null and b/detect_image/1.jpg differ diff --git a/detect_image/2.jpg b/detect_image/2.jpg new file mode 100644 index 0000000..e6af4b6 Binary files /dev/null and b/detect_image/2.jpg differ diff --git a/detect_image/3.jpg b/detect_image/3.jpg new file mode 100644 index 0000000..59b3873 Binary files /dev/null and b/detect_image/3.jpg differ diff --git a/detect_image/bag3568.rknn b/detect_image/bag3568.rknn new file mode 100644 index 0000000..9c261e6 Binary files /dev/null and b/detect_image/bag3568.rknn differ diff --git a/detect_image/bag3588.rknn b/detect_image/bag3588.rknn new file mode 100644 index 0000000..2278721 Binary files /dev/null and b/detect_image/bag3588.rknn differ diff --git a/detect_image/bag_detect.py b/detect_image/bag_detect.py new file mode 100644 index 0000000..eb767c0 --- /dev/null +++ b/detect_image/bag_detect.py @@ -0,0 +1,166 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# ====================== 配置 ====================== +MODEL_PATH = "bag3588.rknn" # RKNN 模型路径 +IMG_PATH = "2.jpg" # 待推理图片路径 +IMG_SIZE = (640, 640) # 模型输入尺寸 (w,h) +OBJ_THRESH = 0.001 # 目标置信度阈值 +NMS_THRESH = 0.45 # NMS 阈值 +CLASS_NAME = ["bag"] +OUTPUT_DIR = "./result" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ====================== 工具函数 ====================== +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + +def dfl_numpy(position): + n, c, h, w = position.shape + p_num = 4 + mc = c // p_num + y = position.reshape(n, p_num, mc, h, w) + y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) + acc = np.arange(mc).reshape(1,1,mc,1,1) + y = np.sum(y * acc, axis=2) + return y + +def box_process(position): + grid_h, grid_w = position.shape[2:4] + col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) + col = col.reshape(1,1,grid_h,grid_w) + row = row.reshape(1,1,grid_h,grid_w) + grid = np.concatenate((col,row), axis=1) + stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1) + position = dfl_numpy(position) + box_xy = grid + 0.5 - position[:,0:2,:,:] + box_xy2 = grid + 0.5 + position[:,2:4,:,:] + xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1) + return xyxy + +def filter_boxes(boxes, box_confidences, box_class_probs): + boxes = np.array(boxes).reshape(-1, 4) + box_confidences = np.array(box_confidences).reshape(-1) + box_class_probs = np.array(box_class_probs) + + class_ids = np.argmax(box_class_probs, axis=-1) + class_scores = box_class_probs[np.arange(len(class_ids)), class_ids] + scores = box_confidences * class_scores + + mask = scores >= OBJ_THRESH + if np.sum(mask) == 0: + return None, None, None, None + + boxes = boxes[mask] + classes = class_ids[mask] + scores = scores[mask] + conf_keep = box_confidences[mask] # 原始 objectness + + # NMS + x1, y1, x2, y2 = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3] + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + inds = np.where(ovr <= NMS_THRESH)[0] + order = order[inds + 1] + return boxes[keep], classes[keep], scores[keep], conf_keep[keep] + +def post_process(outputs, scale, dx, dy): + boxes_list, conf_list, class_list = [], [], [] + branch_num = 3 + for i in range(branch_num): + boxes_list.append(box_process(outputs[i*3])) + conf_list.append(outputs[i*3+2]) + class_list.append(outputs[i*3+1]) + + def flatten(x): + ch = x.shape[1] + x = x.transpose(0,2,3,1) + return x.reshape(-1,ch) + + boxes = np.concatenate([flatten(b) for b in boxes_list]) + box_conf = np.concatenate([flatten(c) for c in conf_list]) + class_probs = np.concatenate([flatten(c) for c in class_list]) + + boxes, classes, scores, conf_keep = filter_boxes(boxes, box_conf, class_probs) + if boxes is None: + return None, None, None, None + + boxes[:, [0,2]] -= dx + boxes[:, [1,3]] -= dy + boxes /= scale + boxes = boxes.clip(min=0) + + # 将 objectness 置信度放大 255 + scores = 1-scores + conf_keep = conf_keep * 255 + return boxes, classes, scores, conf_keep + +# ====================== 单张图片推理 ====================== +def detect_single_image(img_path): + rknn = RKNNLite(verbose=False) + rknn.load_rknn(MODEL_PATH) + rknn.init_runtime() + + img_name = os.path.basename(img_path) + img = cv2.imread(img_path) + if img is None: + raise FileNotFoundError(f"图片无法读取: {img_path}") + + img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE) + input_data = np.expand_dims(img_resized, 0) + outputs = rknn.inference(inputs=[input_data]) + boxes, classes, scores, conf_keep = post_process(outputs, scale, dx, dy) + + if boxes is not None: + for i, box in enumerate(boxes): + x1, y1, x2, y2 = box.astype(int) + cls_id = classes[i] + score = scores[i] + cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(img, + f"{CLASS_NAME[cls_id]}:{score:.1f}", + (x1, max(y1-5,0)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (0, 255, 0), + 2) + + # 保存图像 + if conf_keep is not None and len(conf_keep) > 0: + score_strs = ["{:.0f}".format(s) for s in conf_keep] + name_root, ext = os.path.splitext(img_name) + new_name = name_root + "_conf_" + "_".join(score_strs) + ext + else: + new_name = img_name + + save_path = os.path.join(OUTPUT_DIR, new_name) + cv2.imwrite(save_path, img) + print(f"{img_name} 推理完成,结果保存到: {save_path}") + + rknn.release() + +# ====================== 调用 ====================== +detect_single_image(IMG_PATH) + diff --git a/detect_image/bag_judgment.py b/detect_image/bag_judgment.py new file mode 100644 index 0000000..87a5b11 --- /dev/null +++ b/detect_image/bag_judgment.py @@ -0,0 +1,77 @@ +import cv2 +from detect_bag import detect_bag + +THRESHOLD_X = 537 # min_x 阈值 + +def bag_judgment(img, return_conf=True, return_vis=False): + """ + 判断图片中的料袋状态,可动态返回置信度和可视化图像 + Args: + img (np.ndarray): 待检测图片 + return_conf (bool): 是否返回置信度 + return_vis (bool): 是否返回可视化图像 + Returns: + status_bool: True=到位, False=未到位, None=未检测到 + status_text: 中文状态 + conf: 最大置信度或 None + min_x: 最左边 x 坐标或 None + vis_img: 可视化图像或 None + """ + # 调用 detect_bag + outputs = detect_bag(img, return_conf=return_conf, return_vis=return_vis) + + # 初始化占位 + conf = None + min_x = None + vis_img = None + + # 根据返回值长度解析 + if return_conf and return_vis: + if len(outputs) == 3: + conf, min_x, vis_img = outputs + elif len(outputs) == 2: + conf, min_x = outputs + elif len(outputs) == 1: + min_x = outputs[0] + elif return_conf and not return_vis: + if len(outputs) >= 2: + conf, min_x = outputs[:2] + elif len(outputs) == 1: + min_x = outputs[0] + elif not return_conf and return_vis: + if len(outputs) == 2: + min_x, vis_img = outputs + elif len(outputs) == 1: + min_x = outputs[0] + else: + min_x = outputs if isinstance(outputs, (int, float, np.number)) else outputs[0] + + # 判断状态 + if min_x is None: + status_bool = None + status_text = "没有料袋" + elif min_x >= THRESHOLD_X: + status_bool = True + status_text = "料袋到位" + else: + status_bool = False + status_text = "料袋未到位" + + return status_bool, status_text, conf, min_x, vis_img + + +# ====================== 测试 ====================== +if __name__ == "__main__": + IMG_PATH = "3.jpg" + img = cv2.imread(IMG_PATH) + if img is None: + raise FileNotFoundError(f"图片无法读取: {IMG_PATH}") + + status_bool, status_text, conf, min_x, vis_img = bag_judgment(img, return_conf=True, return_vis=True) + print(f"判断结果: {status_bool}, 中文状态: {status_text}, conf={conf}, min_x={min_x}") + + if vis_img is not None: + cv2.imshow("Vis", vis_img) + cv2.waitKey(0) + cv2.destroyAllWindows() + diff --git a/detect_image/capture-image_1.py b/detect_image/capture-image_1.py new file mode 100644 index 0000000..426c88b --- /dev/null +++ b/detect_image/capture-image_1.py @@ -0,0 +1,202 @@ +import cv2 +import time +import os +import numpy as np +from PIL import Image +from skimage.metrics import structural_similarity as ssim +import shutil +from rknnlite.api import RKNNLite + +# ================== 配置参数 ================== +RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.234:554/streaming/channels/101" +SAVE_INTERVAL = 15 +SSIM_THRESHOLD = 0.9 +OUTPUT_DIR = "camera_test" +RKNN_MODEL = "bag3568.rknn" +SHOW_WINDOW = False + +# 灰度判断参数 +GRAY_LOWER = 70 +GRAY_UPPER = 230 +GRAY_RATIO_THRESHOLD = 0.7 + +IMG_SIZE = (640, 640) +OBJ_THRESH = 0.001 +NMS_THRESH = 0.45 +CLASS_NAME = ["bag"] + +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ================== 灰度判断 ================== +def is_large_gray(image): + img = np.array(image) + if img.ndim != 3 or img.shape[2] != 3: + return True + h, w, _ = img.shape + gray_mask = ( + (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) & + (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) & + (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER) + ) + return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD + +# ================== RKNN 工具函数 ================== +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + +def dfl_numpy(position): + n, c, h, w = position.shape + p_num = 4 + mc = c // p_num + y = position.reshape(n, p_num, mc, h, w) + y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) + acc = np.arange(mc).reshape(1,1,mc,1,1) + return np.sum(y * acc, axis=2) + +def box_process(position): + grid_h, grid_w = position.shape[2:4] + col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) + col = col.reshape(1,1,grid_h,grid_w) + row = row.reshape(1,1,grid_h,grid_w) + grid = np.concatenate((col,row), axis=1) + stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) + position = dfl_numpy(position) + box_xy = grid + 0.5 - position[:,0:2,:,:] + box_xy2 = grid + 0.5 + position[:,2:4,:,:] + return np.concatenate((box_xy*stride, box_xy2*stride), axis=1) + +def filter_boxes(boxes, box_confidences, box_class_probs): + boxes = boxes.reshape(-1,4) + box_confidences = box_confidences.reshape(-1) + box_class_probs = np.array(box_class_probs) + + class_ids = np.argmax(box_class_probs, axis=-1) + class_scores = box_class_probs[np.arange(len(class_ids)), class_ids] + scores = box_confidences * class_scores + + mask = scores >= OBJ_THRESH + if np.sum(mask) == 0: + return None + return True # 只需要判断是否有目标 + +def post_process(outputs, scale, dx, dy): + boxes_list, conf_list, class_list = [], [], [] + for i in range(3): + boxes_list.append(box_process(outputs[i*3])) + conf_list.append(outputs[i*3+2]) + class_list.append(outputs[i*3+1]) + + def flatten(x): + x = x.transpose(0,2,3,1) + return x.reshape(-1,x.shape[3]) + + boxes = np.concatenate([flatten(b) for b in boxes_list]) + box_conf = np.concatenate([flatten(c) for c in conf_list]) + class_probs = np.concatenate([flatten(c) for c in class_list]) + return filter_boxes(boxes, box_conf, class_probs) + +# ================== RKNN 初始化 ================== +rknn = RKNNLite() +if rknn.load_rknn(RKNN_MODEL) != 0: + raise RuntimeError("❌ RKNN 模型加载失败") +if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0: + raise RuntimeError("❌ RKNN Runtime 初始化失败") +print("✅ RKNN 初始化完成") + +# ================== 视频流处理 ================== +max_retry_seconds = 10 +retry_interval_seconds = 1 + +last_gray = None +frame_count = 0 + +while True: + cap = cv2.VideoCapture(RTSP_URL) + start_time = time.time() + + while not cap.isOpened(): + if time.time() - start_time >= max_retry_seconds: + print("❌ 无法连接 RTSP") + exit(1) + time.sleep(retry_interval_seconds) + cap = cv2.VideoCapture(RTSP_URL) + + print("✅ 开始读取视频流") + + try: + while True: + ret, frame = cap.read() + if not ret: + print("❌ 读取失败") + break + + frame_count += 1 + + if SHOW_WINDOW: + cv2.imshow("Camera", frame) + if cv2.waitKey(1) == ord('q'): + raise KeyboardInterrupt + + if frame_count % SAVE_INTERVAL != 0: + continue + + print(f"处理帧 {frame_count}") + + # STEP1: 灰度过滤 + pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + if is_large_gray(pil_image): + print("跳过:大面积灰色") + continue + + # STEP2: SSIM 去重 + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + if last_gray is not None: + sim = ssim(gray, last_gray) + if sim > SSIM_THRESHOLD: + print(f"跳过:SSIM={sim:.3f}") + continue + last_gray = gray.copy() + + # STEP3: RKNN 推理,只判断是否有 bag + img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE) + input_data = np.expand_dims(img_resized, 0) + outputs = rknn.inference(inputs=[input_data]) + has_bag = post_process(outputs, scale, dx, dy) + if not has_bag: + print("跳过:未检测到 bag") + continue + + # STEP4: 磁盘检查 + _, _, free = shutil.disk_usage(OUTPUT_DIR) + if free < 5*1024**3: + print("❌ 磁盘空间不足") + raise SystemExit(1) + + # STEP5: 保存原图 + ts = time.strftime("%Y%m%d_%H%M%S") + ms = int((time.time()%1)*1000) + filename = f"bag_{ts}_{ms:03d}.png" + path = os.path.join(OUTPUT_DIR, filename) + cv2.imwrite(path, frame) # 保存原图 + print(f"✅ 已保存: {path}") + + except KeyboardInterrupt: + print("\n🛑 用户中断") + break + + finally: + cap.release() + cv2.destroyAllWindows() + print(f"视频流关闭,共处理 {frame_count} 帧") + +rknn.release() +print("程序结束") + diff --git a/detect_image/capture-image_2.py b/detect_image/capture-image_2.py new file mode 100644 index 0000000..a216e1b --- /dev/null +++ b/detect_image/capture-image_2.py @@ -0,0 +1,205 @@ +import cv2 +import time +import os +import numpy as np +from PIL import Image +from skimage.metrics import structural_similarity as ssim +from rknnlite.api import RKNNLite + +# ================== 配置 ================== +RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.234:554/streaming/channels/101" +RKNN_MODEL = "bag3568.rknn" +OUTPUT_DIR = "camera_event_capture" + +CONF_THRESHOLD = 0.5 +SSIM_THRESHOLD = 0.9 + +END_MISS_FRAMES = 30 # 连续多少帧未检测到 → 结束采集 +SAVE_EVERY_N_FRAMES = 1 # 采集中每 N 帧保存一次 +SHOW_WINDOW = False + +# 灰度判断参数 +GRAY_LOWER = 70 +GRAY_UPPER = 230 +GRAY_RATIO_THRESHOLD = 0.7 + +IMG_SIZE = (640, 640) +OBJ_THRESH = 0.001 +NMS_THRESH = 0.45 +CLASS_NAME = ["bag"] + +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ================== 灰度判断 ================== +def is_large_gray(image): + img = np.array(image) + if img.ndim != 3 or img.shape[2] != 3: + return True + h, w, _ = img.shape + gray_mask = ( + (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) & + (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) & + (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER) + ) + return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD + +# ================== RKNN 推理工具 ================== +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + +def dfl_numpy(position): + n, c, h, w = position.shape + p_num = 4 + mc = c // p_num + y = position.reshape(n, p_num, mc, h, w) + y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) + acc = np.arange(mc).reshape(1,1,mc,1,1) + return np.sum(y * acc, axis=2) + +def box_process(position): + grid_h, grid_w = position.shape[2:4] + col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) + col = col.reshape(1,1,grid_h,grid_w) + row = row.reshape(1,1,grid_h,grid_w) + grid = np.concatenate((col,row), axis=1) + stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) + position = dfl_numpy(position) + box_xy = grid + 0.5 - position[:,0:2,:,:] + box_xy2 = grid + 0.5 + position[:,2:4,:,:] + return np.concatenate((box_xy*stride, box_xy2*stride), axis=1) + +def filter_boxes(boxes, box_confidences, box_class_probs): + boxes = boxes.reshape(-1,4) + box_confidences = box_confidences.reshape(-1) + box_class_probs = np.array(box_class_probs) + + class_ids = np.argmax(box_class_probs, axis=-1) + class_scores = box_class_probs[np.arange(len(class_ids)), class_ids] + scores = box_confidences * class_scores + + mask = scores >= OBJ_THRESH + return np.sum(mask) > 0 # True: 有 bag, False: 无 bag + +def post_process(outputs, scale, dx, dy): + boxes_list, conf_list, class_list = [], [], [] + for i in range(3): + boxes_list.append(box_process(outputs[i*3])) + conf_list.append(outputs[i*3+2]) + class_list.append(outputs[i*3+1]) + + def flatten(x): + x = x.transpose(0,2,3,1) + return x.reshape(-1,x.shape[3]) + + boxes = np.concatenate([flatten(b) for b in boxes_list]) + box_conf = np.concatenate([flatten(c) for c in conf_list]) + class_probs = np.concatenate([flatten(c) for c in class_list]) + return filter_boxes(boxes, box_conf, class_probs) + +# ================== RKNN 初始化 ================== +rknn = RKNNLite() +if rknn.load_rknn(RKNN_MODEL) != 0: + raise RuntimeError("RKNN 模型加载失败") +if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0: + raise RuntimeError("RKNN Runtime 初始化失败") +print("✅ RKNN 初始化完成") + +# ================== 视频流 ================== +cap = cv2.VideoCapture(RTSP_URL) +if not cap.isOpened(): + raise RuntimeError("RTSP 连接失败") +print("🎥 视频流已连接") + +# ================== 状态机 ================== +STATE_IDLE = 0 +STATE_CAPTURING = 1 + +state = STATE_IDLE +miss_count = 0 +save_idx = 0 +session_dir = None +session_id = 0 +last_gray = None +frame_count = 0 + +try: + while True: + ret, frame = cap.read() + if not ret: + time.sleep(0.5) + continue + + frame_count += 1 + + if SHOW_WINDOW: + cv2.imshow("Camera", frame) + if cv2.waitKey(1) == ord('q'): + break + + # ---------- 灰度过滤 ---------- + pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + if is_large_gray(pil_image): + continue + + # ---------- SSIM 去重 ---------- + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + if last_gray is not None and state == STATE_IDLE: + sim = ssim(gray, last_gray) + if sim > SSIM_THRESHOLD: + continue + last_gray = gray.copy() + + # ---------- RKNN 推理判断是否有 bag ---------- + img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE) + input_data = np.expand_dims(img_resized, 0) + outputs = rknn.inference(inputs=[input_data]) + has_bag = post_process(outputs, scale, dx, dy) + + # ---------- 状态机 ---------- + if state == STATE_IDLE: + if has_bag: + session_id += 1 + ts = time.strftime("%Y%m%d_%H%M%S") + session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}") + os.makedirs(session_dir, exist_ok=True) + print(f"\n🚀 进入采集") + state = STATE_CAPTURING + miss_count = 0 + save_idx = 0 + + elif state == STATE_CAPTURING: + if has_bag: + miss_count = 0 + else: + miss_count += 1 + + if save_idx % SAVE_EVERY_N_FRAMES == 0: + ts = time.strftime("%Y%m%d_%H%M%S") + ms = int((time.time()%1)*1000) + fname = f"{save_idx:06d}_{ts}_{ms:03d}.png" + cv2.imwrite(os.path.join(session_dir, fname), frame) # 保存原图 + save_idx += 1 + + if miss_count >= END_MISS_FRAMES: + print(f"🛑 退出采集,本次保存 {save_idx} 帧") + state = STATE_IDLE + miss_count = 0 + session_dir = None + +except KeyboardInterrupt: + print("\n🛑 用户退出") + +finally: + cap.release() + cv2.destroyAllWindows() + rknn.release() + print("程序结束") + diff --git a/detect_image/detect_bag.py b/detect_image/detect_bag.py new file mode 100644 index 0000000..6a60034 --- /dev/null +++ b/detect_image/detect_bag.py @@ -0,0 +1,181 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# ====================== 配置 ====================== +MODEL_PATH = "bag3588.rknn" +IMG_PATH = "2.jpg" +IMG_SIZE = (640, 640) +OBJ_THRESH = 0.001 +NMS_THRESH = 0.45 +CLASS_NAME = ["bag"] +OUTPUT_DIR = "./result" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ====================== 全局 RKNN ====================== +_global_rknn = None + +def init_rknn(model_path): + global _global_rknn + if _global_rknn is None: + rknn = RKNNLite(verbose=False) + rknn.load_rknn(model_path) + rknn.init_runtime() + _global_rknn = rknn + return _global_rknn + +# ====================== 工具函数 ====================== +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + +def dfl_numpy(position): + n, c, h, w = position.shape + p_num = 4 + mc = c // p_num + y = position.reshape(n, p_num, mc, h, w) + y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) + acc = np.arange(mc).reshape(1,1,mc,1,1) + y = np.sum(y * acc, axis=2) + return y + +def box_process(position): + grid_h, grid_w = position.shape[2:4] + col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) + col = col.reshape(1,1,grid_h,grid_w) + row = row.reshape(1,1,grid_h,grid_w) + grid = np.concatenate((col,row), axis=1) + stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1) + position = dfl_numpy(position) + box_xy = grid + 0.5 - position[:,0:2,:,:] + box_xy2 = grid + 0.5 + position[:,2:4,:,:] + xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1) + return xyxy + +def filter_boxes(boxes, box_confidences, box_class_probs): + boxes = np.array(boxes).reshape(-1, 4) + box_confidences = np.array(box_confidences).reshape(-1) + box_class_probs = np.array(box_class_probs) + + class_ids = np.argmax(box_class_probs, axis=-1) + class_scores = box_class_probs[np.arange(len(class_ids)), class_ids] + scores = box_confidences * class_scores + + mask = scores >= OBJ_THRESH + if np.sum(mask) == 0: + return None, None, None, None + + boxes = boxes[mask] + classes = class_ids[mask] + scores = scores[mask] + conf_keep = box_confidences[mask] + + x1, y1, x2, y2 = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3] + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + w = np.maximum(0, xx2 - xx1 + 1) + h = np.maximum(0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + inds = np.where(ovr <= NMS_THRESH)[0] + order = order[inds + 1] + return boxes[keep], classes[keep], scores[keep], conf_keep[keep] + +def post_process(outputs, scale, dx, dy): + boxes_list, conf_list, class_list = [], [], [] + branch_num = 3 + for i in range(branch_num): + boxes_list.append(box_process(outputs[i*3])) + conf_list.append(outputs[i*3+2]) + class_list.append(outputs[i*3+1]) + + def flatten(x): + ch = x.shape[1] + x = x.transpose(0,2,3,1) + return x.reshape(-1,ch) + + boxes = np.concatenate([flatten(b) for b in boxes_list]) + box_conf = np.concatenate([flatten(c) for c in conf_list]) + class_probs = np.concatenate([flatten(c) for c in class_list]) + + boxes, classes, scores, conf_keep = filter_boxes(boxes, box_conf, class_probs) + if boxes is None: + return None, None, None, None + + boxes[:, [0,2]] -= dx + boxes[:, [1,3]] -= dy + boxes /= scale + boxes = boxes.clip(min=0) + + scores = 1-scores + conf_keep = conf_keep * 255 + return boxes, classes, scores, conf_keep + +# ====================== detect_bag ====================== +def detect_bag(img, return_conf=True, return_vis=False): + rknn = init_rknn(MODEL_PATH) + + img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE) + input_data = np.expand_dims(img_resized, 0) + outputs = rknn.inference(inputs=[input_data]) + boxes, classes, scores, conf_keep = post_process(outputs, scale, dx, dy) + + if boxes is None or len(boxes) == 0: + return (None, None) if return_conf else (None,) + + min_x = float(boxes[:,0].min()) + conf_val = float(scores.max()) if return_conf else None + vis_img = None + + if return_vis: + vis_img = img.copy() + for i, box in enumerate(boxes): + x1, y1, x2, y2 = box.astype(int) + cls_id = classes[i] + score = scores[i] + cv2.rectangle(vis_img, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(vis_img, + f"{CLASS_NAME[cls_id]}:{score:.1f}", + (x1, max(y1-5,0)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + (0, 255, 0), + 2) + save_path = os.path.join(OUTPUT_DIR, "vis_" + "result.jpg") + cv2.imwrite(save_path, vis_img) + + if return_conf: + return conf_val, min_x + else: + return min_x, vis_img + +# ====================== 测试 ====================== +if __name__ == "__main__": + img = cv2.imread(IMG_PATH) + if img is None: + raise FileNotFoundError(f"图片无法读取: {IMG_PATH}") + + # 可控制输出:conf, vis + conf, min_x = detect_bag(img, return_conf=True, return_vis=True) + + if conf is None: + print("❌ 未检测到 bag") + else: + print(f"✅ 最大置信度: {conf:.4f}, 最左 x: {min_x:.1f}") + diff --git a/detect_image/image_01_3588.py b/detect_image/image_01_3588.py new file mode 100644 index 0000000..de9cde6 --- /dev/null +++ b/detect_image/image_01_3588.py @@ -0,0 +1,202 @@ +import cv2 +import time +import os +import numpy as np +from PIL import Image +from skimage.metrics import structural_similarity as ssim +import shutil +from rknnlite.api import RKNNLite + +# ================== 配置参数 ================== +RTSP_URL = "rtsp://admin:XJ123456@192.168.250.60:554/streaming/channels/101" +SAVE_INTERVAL = 15 +SSIM_THRESHOLD = 0.9 +OUTPUT_DIR = "camera_test" +RKNN_MODEL = "bag3588.rknn" +SHOW_WINDOW = False + +# 灰度判断参数 +GRAY_LOWER = 70 +GRAY_UPPER = 230 +GRAY_RATIO_THRESHOLD = 0.7 + +IMG_SIZE = (640, 640) +OBJ_THRESH = 0.001 +NMS_THRESH = 0.45 +CLASS_NAME = ["bag"] + +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ================== 灰度判断 ================== +def is_large_gray(image): + img = np.array(image) + if img.ndim != 3 or img.shape[2] != 3: + return True + h, w, _ = img.shape + gray_mask = ( + (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) & + (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) & + (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER) + ) + return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD + +# ================== RKNN 工具函数 ================== +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + +def dfl_numpy(position): + n, c, h, w = position.shape + p_num = 4 + mc = c // p_num + y = position.reshape(n, p_num, mc, h, w) + y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) + acc = np.arange(mc).reshape(1,1,mc,1,1) + return np.sum(y * acc, axis=2) + +def box_process(position): + grid_h, grid_w = position.shape[2:4] + col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) + col = col.reshape(1,1,grid_h,grid_w) + row = row.reshape(1,1,grid_h,grid_w) + grid = np.concatenate((col,row), axis=1) + stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) + position = dfl_numpy(position) + box_xy = grid + 0.5 - position[:,0:2,:,:] + box_xy2 = grid + 0.5 + position[:,2:4,:,:] + return np.concatenate((box_xy*stride, box_xy2*stride), axis=1) + +def filter_boxes(boxes, box_confidences, box_class_probs): + boxes = boxes.reshape(-1,4) + box_confidences = box_confidences.reshape(-1) + box_class_probs = np.array(box_class_probs) + + class_ids = np.argmax(box_class_probs, axis=-1) + class_scores = box_class_probs[np.arange(len(class_ids)), class_ids] + scores = box_confidences * class_scores + + mask = scores >= OBJ_THRESH + if np.sum(mask) == 0: + return None + return True # 只需要判断是否有目标 + +def post_process(outputs, scale, dx, dy): + boxes_list, conf_list, class_list = [], [], [] + for i in range(3): + boxes_list.append(box_process(outputs[i*3])) + conf_list.append(outputs[i*3+2]) + class_list.append(outputs[i*3+1]) + + def flatten(x): + x = x.transpose(0,2,3,1) + return x.reshape(-1,x.shape[3]) + + boxes = np.concatenate([flatten(b) for b in boxes_list]) + box_conf = np.concatenate([flatten(c) for c in conf_list]) + class_probs = np.concatenate([flatten(c) for c in class_list]) + return filter_boxes(boxes, box_conf, class_probs) + +# ================== RKNN 初始化 ================== +rknn = RKNNLite() +if rknn.load_rknn(RKNN_MODEL) != 0: + raise RuntimeError("❌ RKNN 模型加载失败") +if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0: + raise RuntimeError("❌ RKNN Runtime 初始化失败") +print("✅ RKNN 初始化完成") + +# ================== 视频流处理 ================== +max_retry_seconds = 10 +retry_interval_seconds = 1 + +last_gray = None +frame_count = 0 + +while True: + cap = cv2.VideoCapture(RTSP_URL) + start_time = time.time() + + while not cap.isOpened(): + if time.time() - start_time >= max_retry_seconds: + print("❌ 无法连接 RTSP") + exit(1) + time.sleep(retry_interval_seconds) + cap = cv2.VideoCapture(RTSP_URL) + + print("✅ 开始读取视频流") + + try: + while True: + ret, frame = cap.read() + if not ret: + print("❌ 读取失败") + break + + frame_count += 1 + + if SHOW_WINDOW: + cv2.imshow("Camera", frame) + if cv2.waitKey(1) == ord('q'): + raise KeyboardInterrupt + + if frame_count % SAVE_INTERVAL != 0: + continue + + print(f"处理帧 {frame_count}") + + # STEP1: 灰度过滤 + pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + if is_large_gray(pil_image): + print("跳过:大面积灰色") + continue + + # STEP2: SSIM 去重 + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + if last_gray is not None: + sim = ssim(gray, last_gray) + if sim > SSIM_THRESHOLD: + print(f"跳过:SSIM={sim:.3f}") + continue + last_gray = gray.copy() + + # STEP3: RKNN 推理,只判断是否有 bag + img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE) + input_data = np.expand_dims(img_resized, 0) + outputs = rknn.inference(inputs=[input_data]) + has_bag = post_process(outputs, scale, dx, dy) + if not has_bag: + print("跳过:未检测到 bag") + continue + + # STEP4: 磁盘检查 + _, _, free = shutil.disk_usage(OUTPUT_DIR) + if free < 5*1024**3: + print("❌ 磁盘空间不足") + raise SystemExit(1) + + # STEP5: 保存原图 + ts = time.strftime("%Y%m%d_%H%M%S") + ms = int((time.time()%1)*1000) + filename = f"bag_{ts}_{ms:03d}.png" + path = os.path.join(OUTPUT_DIR, filename) + cv2.imwrite(path, frame) # 保存原图 + print(f"✅ 已保存: {path}") + + except KeyboardInterrupt: + print("\n🛑 用户中断") + break + + finally: + cap.release() + cv2.destroyAllWindows() + print(f"视频流关闭,共处理 {frame_count} 帧") + +rknn.release() +print("程序结束") + diff --git a/detect_image/image_02_3588.py b/detect_image/image_02_3588.py new file mode 100644 index 0000000..8c0a9a7 --- /dev/null +++ b/detect_image/image_02_3588.py @@ -0,0 +1,205 @@ +import cv2 +import time +import os +import numpy as np +from PIL import Image +from skimage.metrics import structural_similarity as ssim +from rknnlite.api import RKNNLite + +# ================== 配置 ================== +RTSP_URL = "rtsp://admin:XJ123456@192.168.250.60:554/streaming/channels/101" +RKNN_MODEL = "bag3588.rknn" +OUTPUT_DIR = "camera_event_capture" + +CONF_THRESHOLD = 0.5 +SSIM_THRESHOLD = 0.9 + +END_MISS_FRAMES = 30 # 连续多少帧未检测到 → 结束采集 +SAVE_EVERY_N_FRAMES = 1 # 采集中每 N 帧保存一次 +SHOW_WINDOW = False + +# 灰度判断参数 +GRAY_LOWER = 70 +GRAY_UPPER = 230 +GRAY_RATIO_THRESHOLD = 0.7 + +IMG_SIZE = (640, 640) +OBJ_THRESH = 0.001 +NMS_THRESH = 0.45 +CLASS_NAME = ["bag"] + +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ================== 灰度判断 ================== +def is_large_gray(image): + img = np.array(image) + if img.ndim != 3 or img.shape[2] != 3: + return True + h, w, _ = img.shape + gray_mask = ( + (img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) & + (img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) & + (img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER) + ) + return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD + +# ================== RKNN 推理工具 ================== +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + +def dfl_numpy(position): + n, c, h, w = position.shape + p_num = 4 + mc = c // p_num + y = position.reshape(n, p_num, mc, h, w) + y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) + acc = np.arange(mc).reshape(1,1,mc,1,1) + return np.sum(y * acc, axis=2) + +def box_process(position): + grid_h, grid_w = position.shape[2:4] + col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) + col = col.reshape(1,1,grid_h,grid_w) + row = row.reshape(1,1,grid_h,grid_w) + grid = np.concatenate((col,row), axis=1) + stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) + position = dfl_numpy(position) + box_xy = grid + 0.5 - position[:,0:2,:,:] + box_xy2 = grid + 0.5 + position[:,2:4,:,:] + return np.concatenate((box_xy*stride, box_xy2*stride), axis=1) + +def filter_boxes(boxes, box_confidences, box_class_probs): + boxes = boxes.reshape(-1,4) + box_confidences = box_confidences.reshape(-1) + box_class_probs = np.array(box_class_probs) + + class_ids = np.argmax(box_class_probs, axis=-1) + class_scores = box_class_probs[np.arange(len(class_ids)), class_ids] + scores = box_confidences * class_scores + + mask = scores >= OBJ_THRESH + return np.sum(mask) > 0 # True: 有 bag, False: 无 bag + +def post_process(outputs, scale, dx, dy): + boxes_list, conf_list, class_list = [], [], [] + for i in range(3): + boxes_list.append(box_process(outputs[i*3])) + conf_list.append(outputs[i*3+2]) + class_list.append(outputs[i*3+1]) + + def flatten(x): + x = x.transpose(0,2,3,1) + return x.reshape(-1,x.shape[3]) + + boxes = np.concatenate([flatten(b) for b in boxes_list]) + box_conf = np.concatenate([flatten(c) for c in conf_list]) + class_probs = np.concatenate([flatten(c) for c in class_list]) + return filter_boxes(boxes, box_conf, class_probs) + +# ================== RKNN 初始化 ================== +rknn = RKNNLite() +if rknn.load_rknn(RKNN_MODEL) != 0: + raise RuntimeError("RKNN 模型加载失败") +if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0: + raise RuntimeError("RKNN Runtime 初始化失败") +print("✅ RKNN 初始化完成") + +# ================== 视频流 ================== +cap = cv2.VideoCapture(RTSP_URL) +if not cap.isOpened(): + raise RuntimeError("RTSP 连接失败") +print("🎥 视频流已连接") + +# ================== 状态机 ================== +STATE_IDLE = 0 +STATE_CAPTURING = 1 + +state = STATE_IDLE +miss_count = 0 +save_idx = 0 +session_dir = None +session_id = 0 +last_gray = None +frame_count = 0 + +try: + while True: + ret, frame = cap.read() + if not ret: + time.sleep(0.5) + continue + + frame_count += 1 + + if SHOW_WINDOW: + cv2.imshow("Camera", frame) + if cv2.waitKey(1) == ord('q'): + break + + # ---------- 灰度过滤 ---------- + pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + if is_large_gray(pil_image): + continue + + # ---------- SSIM 去重 ---------- + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + if last_gray is not None and state == STATE_IDLE: + sim = ssim(gray, last_gray) + if sim > SSIM_THRESHOLD: + continue + last_gray = gray.copy() + + # ---------- RKNN 推理判断是否有 bag ---------- + img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE) + input_data = np.expand_dims(img_resized, 0) + outputs = rknn.inference(inputs=[input_data]) + has_bag = post_process(outputs, scale, dx, dy) + + # ---------- 状态机 ---------- + if state == STATE_IDLE: + if has_bag: + session_id += 1 + ts = time.strftime("%Y%m%d_%H%M%S") + session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}") + os.makedirs(session_dir, exist_ok=True) + print(f"\n🚀 进入采集") + state = STATE_CAPTURING + miss_count = 0 + save_idx = 0 + + elif state == STATE_CAPTURING: + if has_bag: + miss_count = 0 + else: + miss_count += 1 + + if save_idx % SAVE_EVERY_N_FRAMES == 0: + ts = time.strftime("%Y%m%d_%H%M%S") + ms = int((time.time()%1)*1000) + fname = f"{save_idx:06d}_{ts}_{ms:03d}.png" + cv2.imwrite(os.path.join(session_dir, fname), frame) # 保存原图 + save_idx += 1 + + if miss_count >= END_MISS_FRAMES: + print(f"🛑 退出采集,本次保存 {save_idx} 帧") + state = STATE_IDLE + miss_count = 0 + session_dir = None + +except KeyboardInterrupt: + print("\n🛑 用户退出") + +finally: + cap.release() + cv2.destroyAllWindows() + rknn.release() + print("程序结束") +