更新料带目标检测,判断料带到位逻辑
This commit is contained in:
93
ailai_pc/chose_ROI.py
Normal file
93
ailai_pc/chose_ROI.py
Normal file
@ -0,0 +1,93 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
# 全局变量
|
||||
drawing = False # 是否正在绘制
|
||||
ix, iy = -1, -1 # 起始点
|
||||
roi_list = [] # 存储多个 ROI 坐标 [(x, y, w, h), ...]
|
||||
image_path = "1.jpg" # <<< 修改为你自己的图像路径
|
||||
save_dir = "./roi_111/1.txt" # 保存坐标的目录
|
||||
|
||||
# 创建保存目录
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
def draw_rectangle(event, x, y, flags, param):
|
||||
global ix, iy, drawing, img_copy, roi_list
|
||||
|
||||
if event == cv2.EVENT_LBUTTONDOWN:
|
||||
drawing = True
|
||||
ix, iy = x, y
|
||||
|
||||
elif event == cv2.EVENT_MOUSEMOVE:
|
||||
if drawing:
|
||||
# 每次移动都恢复原始图像,重新画矩形
|
||||
img_copy = img.copy()
|
||||
cv2.rectangle(img_copy, (ix, iy), (x, y), (0, 255, 0), 2)
|
||||
cv2.imshow("Select ROI", img_copy)
|
||||
|
||||
elif event == cv2.EVENT_LBUTTONUP:
|
||||
drawing = False
|
||||
w = x - ix
|
||||
h = y - iy
|
||||
if w != 0 and h != 0:
|
||||
# 确保宽高为正
|
||||
x_start = min(ix, x)
|
||||
y_start = min(iy, y)
|
||||
w = abs(w)
|
||||
h = abs(h)
|
||||
cv2.rectangle(img_copy, (x_start, y_start), (x_start + w, y_start + h), (0, 255, 0), 2)
|
||||
cv2.imshow("Select ROI", img_copy)
|
||||
# 添加到列表
|
||||
roi_list.append((x_start, y_start, w, h))
|
||||
print(f"已选择 ROI: (x={x_start}, y={y_start}, w={w}, h={h})")
|
||||
|
||||
# 保存坐标到 .txt 文件的函数
|
||||
def save_rois_to_txt(rois, filepath):
|
||||
with open(filepath, 'w') as file:
|
||||
for roi in rois:
|
||||
# 将每个 ROI 转换为字符串并写入文件,每行一个 ROI
|
||||
line = ','.join(map(str, roi)) + '\n'
|
||||
file.write(line)
|
||||
print(f"💾 ROI 坐标已保存至: {filepath}")
|
||||
|
||||
def select_roi(image_path):
|
||||
global img, img_copy
|
||||
|
||||
img = cv2.imread(image_path)
|
||||
if img is None:
|
||||
print(f"❌ 无法读取图像: {image_path}")
|
||||
return
|
||||
|
||||
img_copy = img.copy()
|
||||
cv2.namedWindow("Select ROI")
|
||||
cv2.setMouseCallback("Select ROI", draw_rectangle)
|
||||
|
||||
print("📌 使用鼠标左键拖拽选择 ROI")
|
||||
print("✅ 选择完成后按 's' 键保存坐标")
|
||||
print("⏭️ 按 'n' 键跳过/下一步(可自定义)")
|
||||
print("🚪 按 'q' 键退出")
|
||||
|
||||
while True:
|
||||
cv2.imshow("Select ROI", img_copy)
|
||||
key = cv2.waitKey(1) & 0xFF
|
||||
|
||||
if key == ord('s'):
|
||||
# 保存坐标
|
||||
base_name = os.path.splitext(os.path.basename(image_path))[0]
|
||||
save_path = os.path.join(save_dir, f"{base_name}_rois1.txt") # 修改了扩展名为 .txt
|
||||
save_rois_to_txt(roi_list, save_path) # 使用新的保存函数
|
||||
|
||||
elif key == ord('n'):
|
||||
print("⏭️ 跳到下一张图片(此处可扩展)")
|
||||
break
|
||||
|
||||
elif key == ord('q'):
|
||||
print("👋 退出程序")
|
||||
cv2.destroyAllWindows()
|
||||
return
|
||||
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if __name__ == "__main__":
|
||||
select_roi(image_path)
|
||||
65
ailai_pc/divid_conf—box.py
Normal file
65
ailai_pc/divid_conf—box.py
Normal file
@ -0,0 +1,65 @@
|
||||
import cv2
|
||||
import os
|
||||
import shutil
|
||||
from ultralytics import YOLO
|
||||
|
||||
# ====================== 配置 ======================
|
||||
MODEL_PATH = 'point.pt'
|
||||
IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/ailaipoint'
|
||||
|
||||
OUTPUT_ROOT = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/ailaipoint/train_split'
|
||||
OUTPUT_DIR_0 = os.path.join(OUTPUT_ROOT, '0') # 无目标 / conf=0
|
||||
OUTPUT_DIR_1 = os.path.join(OUTPUT_ROOT, '1') # 0 < conf < 0.5
|
||||
OUTPUT_DIR_2 = os.path.join(OUTPUT_ROOT, '2') # conf >= 0.5
|
||||
|
||||
for d in [OUTPUT_DIR_0, OUTPUT_DIR_1, OUTPUT_DIR_2]:
|
||||
os.makedirs(d, exist_ok=True)
|
||||
|
||||
IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp'}
|
||||
|
||||
# ====================== 主程序 ======================
|
||||
if __name__ == "__main__":
|
||||
print("🚀 bbox 置信度分桶(移动原图,含无目标图像)")
|
||||
|
||||
model = YOLO(MODEL_PATH)
|
||||
|
||||
image_files = [
|
||||
f for f in os.listdir(IMAGE_SOURCE_DIR)
|
||||
if os.path.splitext(f.lower())[1] in IMG_EXTENSIONS
|
||||
]
|
||||
|
||||
print(f"📸 找到图片 {len(image_files)} 张")
|
||||
|
||||
for img_name in image_files:
|
||||
src_path = os.path.join(IMAGE_SOURCE_DIR, img_name)
|
||||
|
||||
img = cv2.imread(src_path)
|
||||
if img is None:
|
||||
continue
|
||||
|
||||
results = model(img, verbose=False)
|
||||
|
||||
# ====================== 关键修复点 ======================
|
||||
if not results or results[0].boxes is None or len(results[0].boxes.conf) == 0:
|
||||
# 没有任何检测框 → 当作 conf = 0
|
||||
bbox_conf = 0.0
|
||||
else:
|
||||
# 有检测框 → 取第一个(或最大 conf)
|
||||
bbox_conf = float(results[0].boxes.conf[0].cpu().item())
|
||||
|
||||
# ====================== 分桶 ======================
|
||||
if bbox_conf == 0:
|
||||
dst_dir = OUTPUT_DIR_0
|
||||
elif bbox_conf < 0.5:
|
||||
dst_dir = OUTPUT_DIR_1
|
||||
else:
|
||||
dst_dir = OUTPUT_DIR_2
|
||||
|
||||
dst_path = os.path.join(dst_dir, img_name)
|
||||
|
||||
# ====================== 移动文件 ======================
|
||||
shutil.move(src_path, dst_path)
|
||||
|
||||
print(f"{img_name} -> conf={bbox_conf:.3f} -> {os.path.basename(dst_dir)}")
|
||||
|
||||
print("✅ 完成(含无目标图片)")
|
||||
@ -6,8 +6,8 @@ from ultralytics import YOLO
|
||||
# ====================== 用户配置 ======================
|
||||
#MODEL_PATH = '/home/hx/yolo/ultralytics_yolo11-main/runs/train/exp_ailai2/weights/best.pt'
|
||||
MODEL_PATH = 'point.pt'
|
||||
IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212' # 验证集图片目录
|
||||
LABEL_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251212' # 标签目录(与图片同名 .txt)
|
||||
IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251214' # 验证集图片目录
|
||||
LABEL_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/1/20251214' # 标签目录(与图片同名 .txt)
|
||||
OUTPUT_DIR = './output_images'
|
||||
|
||||
|
||||
|
||||
0
ailai_pc/roi_111/1.txt/1_rois1.txt
Normal file
0
ailai_pc/roi_111/1.txt/1_rois1.txt
Normal file
BIN
detect_image/1.jpg
Normal file
BIN
detect_image/1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 333 KiB |
BIN
detect_image/2.jpg
Normal file
BIN
detect_image/2.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 860 KiB |
BIN
detect_image/3.jpg
Normal file
BIN
detect_image/3.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 725 KiB |
BIN
detect_image/bag3568.rknn
Normal file
BIN
detect_image/bag3568.rknn
Normal file
Binary file not shown.
BIN
detect_image/bag3588.rknn
Normal file
BIN
detect_image/bag3588.rknn
Normal file
Binary file not shown.
166
detect_image/bag_detect.py
Normal file
166
detect_image/bag_detect.py
Normal file
@ -0,0 +1,166 @@
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
from rknnlite.api import RKNNLite
|
||||
|
||||
# ====================== 配置 ======================
|
||||
MODEL_PATH = "bag3588.rknn" # RKNN 模型路径
|
||||
IMG_PATH = "2.jpg" # 待推理图片路径
|
||||
IMG_SIZE = (640, 640) # 模型输入尺寸 (w,h)
|
||||
OBJ_THRESH = 0.001 # 目标置信度阈值
|
||||
NMS_THRESH = 0.45 # NMS 阈值
|
||||
CLASS_NAME = ["bag"]
|
||||
OUTPUT_DIR = "./result"
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# ====================== 工具函数 ======================
|
||||
def letterbox_resize(image, size, bg_color=114):
|
||||
target_w, target_h = size
|
||||
h, w = image.shape[:2]
|
||||
scale = min(target_w / w, target_h / h)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
resized = cv2.resize(image, (new_w, new_h))
|
||||
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
|
||||
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
|
||||
canvas[dy:dy + new_h, dx:dx + new_w] = resized
|
||||
return canvas, scale, dx, dy
|
||||
|
||||
def dfl_numpy(position):
|
||||
n, c, h, w = position.shape
|
||||
p_num = 4
|
||||
mc = c // p_num
|
||||
y = position.reshape(n, p_num, mc, h, w)
|
||||
y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
|
||||
acc = np.arange(mc).reshape(1,1,mc,1,1)
|
||||
y = np.sum(y * acc, axis=2)
|
||||
return y
|
||||
|
||||
def box_process(position):
|
||||
grid_h, grid_w = position.shape[2:4]
|
||||
col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
|
||||
col = col.reshape(1,1,grid_h,grid_w)
|
||||
row = row.reshape(1,1,grid_h,grid_w)
|
||||
grid = np.concatenate((col,row), axis=1)
|
||||
stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1)
|
||||
position = dfl_numpy(position)
|
||||
box_xy = grid + 0.5 - position[:,0:2,:,:]
|
||||
box_xy2 = grid + 0.5 + position[:,2:4,:,:]
|
||||
xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
|
||||
return xyxy
|
||||
|
||||
def filter_boxes(boxes, box_confidences, box_class_probs):
|
||||
boxes = np.array(boxes).reshape(-1, 4)
|
||||
box_confidences = np.array(box_confidences).reshape(-1)
|
||||
box_class_probs = np.array(box_class_probs)
|
||||
|
||||
class_ids = np.argmax(box_class_probs, axis=-1)
|
||||
class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
|
||||
scores = box_confidences * class_scores
|
||||
|
||||
mask = scores >= OBJ_THRESH
|
||||
if np.sum(mask) == 0:
|
||||
return None, None, None, None
|
||||
|
||||
boxes = boxes[mask]
|
||||
classes = class_ids[mask]
|
||||
scores = scores[mask]
|
||||
conf_keep = box_confidences[mask] # 原始 objectness
|
||||
|
||||
# NMS
|
||||
x1, y1, x2, y2 = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3]
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
w = np.maximum(0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
inds = np.where(ovr <= NMS_THRESH)[0]
|
||||
order = order[inds + 1]
|
||||
return boxes[keep], classes[keep], scores[keep], conf_keep[keep]
|
||||
|
||||
def post_process(outputs, scale, dx, dy):
|
||||
boxes_list, conf_list, class_list = [], [], []
|
||||
branch_num = 3
|
||||
for i in range(branch_num):
|
||||
boxes_list.append(box_process(outputs[i*3]))
|
||||
conf_list.append(outputs[i*3+2])
|
||||
class_list.append(outputs[i*3+1])
|
||||
|
||||
def flatten(x):
|
||||
ch = x.shape[1]
|
||||
x = x.transpose(0,2,3,1)
|
||||
return x.reshape(-1,ch)
|
||||
|
||||
boxes = np.concatenate([flatten(b) for b in boxes_list])
|
||||
box_conf = np.concatenate([flatten(c) for c in conf_list])
|
||||
class_probs = np.concatenate([flatten(c) for c in class_list])
|
||||
|
||||
boxes, classes, scores, conf_keep = filter_boxes(boxes, box_conf, class_probs)
|
||||
if boxes is None:
|
||||
return None, None, None, None
|
||||
|
||||
boxes[:, [0,2]] -= dx
|
||||
boxes[:, [1,3]] -= dy
|
||||
boxes /= scale
|
||||
boxes = boxes.clip(min=0)
|
||||
|
||||
# 将 objectness 置信度放大 255
|
||||
scores = 1-scores
|
||||
conf_keep = conf_keep * 255
|
||||
return boxes, classes, scores, conf_keep
|
||||
|
||||
# ====================== 单张图片推理 ======================
|
||||
def detect_single_image(img_path):
|
||||
rknn = RKNNLite(verbose=False)
|
||||
rknn.load_rknn(MODEL_PATH)
|
||||
rknn.init_runtime()
|
||||
|
||||
img_name = os.path.basename(img_path)
|
||||
img = cv2.imread(img_path)
|
||||
if img is None:
|
||||
raise FileNotFoundError(f"图片无法读取: {img_path}")
|
||||
|
||||
img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE)
|
||||
input_data = np.expand_dims(img_resized, 0)
|
||||
outputs = rknn.inference(inputs=[input_data])
|
||||
boxes, classes, scores, conf_keep = post_process(outputs, scale, dx, dy)
|
||||
|
||||
if boxes is not None:
|
||||
for i, box in enumerate(boxes):
|
||||
x1, y1, x2, y2 = box.astype(int)
|
||||
cls_id = classes[i]
|
||||
score = scores[i]
|
||||
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(img,
|
||||
f"{CLASS_NAME[cls_id]}:{score:.1f}",
|
||||
(x1, max(y1-5,0)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.6,
|
||||
(0, 255, 0),
|
||||
2)
|
||||
|
||||
# 保存图像
|
||||
if conf_keep is not None and len(conf_keep) > 0:
|
||||
score_strs = ["{:.0f}".format(s) for s in conf_keep]
|
||||
name_root, ext = os.path.splitext(img_name)
|
||||
new_name = name_root + "_conf_" + "_".join(score_strs) + ext
|
||||
else:
|
||||
new_name = img_name
|
||||
|
||||
save_path = os.path.join(OUTPUT_DIR, new_name)
|
||||
cv2.imwrite(save_path, img)
|
||||
print(f"{img_name} 推理完成,结果保存到: {save_path}")
|
||||
|
||||
rknn.release()
|
||||
|
||||
# ====================== 调用 ======================
|
||||
detect_single_image(IMG_PATH)
|
||||
|
||||
77
detect_image/bag_judgment.py
Normal file
77
detect_image/bag_judgment.py
Normal file
@ -0,0 +1,77 @@
|
||||
import cv2
|
||||
from detect_bag import detect_bag
|
||||
|
||||
THRESHOLD_X = 537 # min_x 阈值
|
||||
|
||||
def bag_judgment(img, return_conf=True, return_vis=False):
|
||||
"""
|
||||
判断图片中的料袋状态,可动态返回置信度和可视化图像
|
||||
Args:
|
||||
img (np.ndarray): 待检测图片
|
||||
return_conf (bool): 是否返回置信度
|
||||
return_vis (bool): 是否返回可视化图像
|
||||
Returns:
|
||||
status_bool: True=到位, False=未到位, None=未检测到
|
||||
status_text: 中文状态
|
||||
conf: 最大置信度或 None
|
||||
min_x: 最左边 x 坐标或 None
|
||||
vis_img: 可视化图像或 None
|
||||
"""
|
||||
# 调用 detect_bag
|
||||
outputs = detect_bag(img, return_conf=return_conf, return_vis=return_vis)
|
||||
|
||||
# 初始化占位
|
||||
conf = None
|
||||
min_x = None
|
||||
vis_img = None
|
||||
|
||||
# 根据返回值长度解析
|
||||
if return_conf and return_vis:
|
||||
if len(outputs) == 3:
|
||||
conf, min_x, vis_img = outputs
|
||||
elif len(outputs) == 2:
|
||||
conf, min_x = outputs
|
||||
elif len(outputs) == 1:
|
||||
min_x = outputs[0]
|
||||
elif return_conf and not return_vis:
|
||||
if len(outputs) >= 2:
|
||||
conf, min_x = outputs[:2]
|
||||
elif len(outputs) == 1:
|
||||
min_x = outputs[0]
|
||||
elif not return_conf and return_vis:
|
||||
if len(outputs) == 2:
|
||||
min_x, vis_img = outputs
|
||||
elif len(outputs) == 1:
|
||||
min_x = outputs[0]
|
||||
else:
|
||||
min_x = outputs if isinstance(outputs, (int, float, np.number)) else outputs[0]
|
||||
|
||||
# 判断状态
|
||||
if min_x is None:
|
||||
status_bool = None
|
||||
status_text = "没有料袋"
|
||||
elif min_x >= THRESHOLD_X:
|
||||
status_bool = True
|
||||
status_text = "料袋到位"
|
||||
else:
|
||||
status_bool = False
|
||||
status_text = "料袋未到位"
|
||||
|
||||
return status_bool, status_text, conf, min_x, vis_img
|
||||
|
||||
|
||||
# ====================== 测试 ======================
|
||||
if __name__ == "__main__":
|
||||
IMG_PATH = "3.jpg"
|
||||
img = cv2.imread(IMG_PATH)
|
||||
if img is None:
|
||||
raise FileNotFoundError(f"图片无法读取: {IMG_PATH}")
|
||||
|
||||
status_bool, status_text, conf, min_x, vis_img = bag_judgment(img, return_conf=True, return_vis=True)
|
||||
print(f"判断结果: {status_bool}, 中文状态: {status_text}, conf={conf}, min_x={min_x}")
|
||||
|
||||
if vis_img is not None:
|
||||
cv2.imshow("Vis", vis_img)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
202
detect_image/capture-image_1.py
Normal file
202
detect_image/capture-image_1.py
Normal file
@ -0,0 +1,202 @@
|
||||
import cv2
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from skimage.metrics import structural_similarity as ssim
|
||||
import shutil
|
||||
from rknnlite.api import RKNNLite
|
||||
|
||||
# ================== 配置参数 ==================
|
||||
RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.234:554/streaming/channels/101"
|
||||
SAVE_INTERVAL = 15
|
||||
SSIM_THRESHOLD = 0.9
|
||||
OUTPUT_DIR = "camera_test"
|
||||
RKNN_MODEL = "bag3568.rknn"
|
||||
SHOW_WINDOW = False
|
||||
|
||||
# 灰度判断参数
|
||||
GRAY_LOWER = 70
|
||||
GRAY_UPPER = 230
|
||||
GRAY_RATIO_THRESHOLD = 0.7
|
||||
|
||||
IMG_SIZE = (640, 640)
|
||||
OBJ_THRESH = 0.001
|
||||
NMS_THRESH = 0.45
|
||||
CLASS_NAME = ["bag"]
|
||||
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# ================== 灰度判断 ==================
|
||||
def is_large_gray(image):
|
||||
img = np.array(image)
|
||||
if img.ndim != 3 or img.shape[2] != 3:
|
||||
return True
|
||||
h, w, _ = img.shape
|
||||
gray_mask = (
|
||||
(img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
|
||||
(img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
|
||||
(img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
|
||||
)
|
||||
return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
|
||||
|
||||
# ================== RKNN 工具函数 ==================
|
||||
def letterbox_resize(image, size, bg_color=114):
|
||||
target_w, target_h = size
|
||||
h, w = image.shape[:2]
|
||||
scale = min(target_w / w, target_h / h)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
resized = cv2.resize(image, (new_w, new_h))
|
||||
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
|
||||
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
|
||||
canvas[dy:dy + new_h, dx:dx + new_w] = resized
|
||||
return canvas, scale, dx, dy
|
||||
|
||||
def dfl_numpy(position):
|
||||
n, c, h, w = position.shape
|
||||
p_num = 4
|
||||
mc = c // p_num
|
||||
y = position.reshape(n, p_num, mc, h, w)
|
||||
y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
|
||||
acc = np.arange(mc).reshape(1,1,mc,1,1)
|
||||
return np.sum(y * acc, axis=2)
|
||||
|
||||
def box_process(position):
|
||||
grid_h, grid_w = position.shape[2:4]
|
||||
col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
|
||||
col = col.reshape(1,1,grid_h,grid_w)
|
||||
row = row.reshape(1,1,grid_h,grid_w)
|
||||
grid = np.concatenate((col,row), axis=1)
|
||||
stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
|
||||
position = dfl_numpy(position)
|
||||
box_xy = grid + 0.5 - position[:,0:2,:,:]
|
||||
box_xy2 = grid + 0.5 + position[:,2:4,:,:]
|
||||
return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
|
||||
|
||||
def filter_boxes(boxes, box_confidences, box_class_probs):
|
||||
boxes = boxes.reshape(-1,4)
|
||||
box_confidences = box_confidences.reshape(-1)
|
||||
box_class_probs = np.array(box_class_probs)
|
||||
|
||||
class_ids = np.argmax(box_class_probs, axis=-1)
|
||||
class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
|
||||
scores = box_confidences * class_scores
|
||||
|
||||
mask = scores >= OBJ_THRESH
|
||||
if np.sum(mask) == 0:
|
||||
return None
|
||||
return True # 只需要判断是否有目标
|
||||
|
||||
def post_process(outputs, scale, dx, dy):
|
||||
boxes_list, conf_list, class_list = [], [], []
|
||||
for i in range(3):
|
||||
boxes_list.append(box_process(outputs[i*3]))
|
||||
conf_list.append(outputs[i*3+2])
|
||||
class_list.append(outputs[i*3+1])
|
||||
|
||||
def flatten(x):
|
||||
x = x.transpose(0,2,3,1)
|
||||
return x.reshape(-1,x.shape[3])
|
||||
|
||||
boxes = np.concatenate([flatten(b) for b in boxes_list])
|
||||
box_conf = np.concatenate([flatten(c) for c in conf_list])
|
||||
class_probs = np.concatenate([flatten(c) for c in class_list])
|
||||
return filter_boxes(boxes, box_conf, class_probs)
|
||||
|
||||
# ================== RKNN 初始化 ==================
|
||||
rknn = RKNNLite()
|
||||
if rknn.load_rknn(RKNN_MODEL) != 0:
|
||||
raise RuntimeError("❌ RKNN 模型加载失败")
|
||||
if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
|
||||
raise RuntimeError("❌ RKNN Runtime 初始化失败")
|
||||
print("✅ RKNN 初始化完成")
|
||||
|
||||
# ================== 视频流处理 ==================
|
||||
max_retry_seconds = 10
|
||||
retry_interval_seconds = 1
|
||||
|
||||
last_gray = None
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
cap = cv2.VideoCapture(RTSP_URL)
|
||||
start_time = time.time()
|
||||
|
||||
while not cap.isOpened():
|
||||
if time.time() - start_time >= max_retry_seconds:
|
||||
print("❌ 无法连接 RTSP")
|
||||
exit(1)
|
||||
time.sleep(retry_interval_seconds)
|
||||
cap = cv2.VideoCapture(RTSP_URL)
|
||||
|
||||
print("✅ 开始读取视频流")
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("❌ 读取失败")
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
if SHOW_WINDOW:
|
||||
cv2.imshow("Camera", frame)
|
||||
if cv2.waitKey(1) == ord('q'):
|
||||
raise KeyboardInterrupt
|
||||
|
||||
if frame_count % SAVE_INTERVAL != 0:
|
||||
continue
|
||||
|
||||
print(f"处理帧 {frame_count}")
|
||||
|
||||
# STEP1: 灰度过滤
|
||||
pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
if is_large_gray(pil_image):
|
||||
print("跳过:大面积灰色")
|
||||
continue
|
||||
|
||||
# STEP2: SSIM 去重
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
if last_gray is not None:
|
||||
sim = ssim(gray, last_gray)
|
||||
if sim > SSIM_THRESHOLD:
|
||||
print(f"跳过:SSIM={sim:.3f}")
|
||||
continue
|
||||
last_gray = gray.copy()
|
||||
|
||||
# STEP3: RKNN 推理,只判断是否有 bag
|
||||
img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
|
||||
input_data = np.expand_dims(img_resized, 0)
|
||||
outputs = rknn.inference(inputs=[input_data])
|
||||
has_bag = post_process(outputs, scale, dx, dy)
|
||||
if not has_bag:
|
||||
print("跳过:未检测到 bag")
|
||||
continue
|
||||
|
||||
# STEP4: 磁盘检查
|
||||
_, _, free = shutil.disk_usage(OUTPUT_DIR)
|
||||
if free < 5*1024**3:
|
||||
print("❌ 磁盘空间不足")
|
||||
raise SystemExit(1)
|
||||
|
||||
# STEP5: 保存原图
|
||||
ts = time.strftime("%Y%m%d_%H%M%S")
|
||||
ms = int((time.time()%1)*1000)
|
||||
filename = f"bag_{ts}_{ms:03d}.png"
|
||||
path = os.path.join(OUTPUT_DIR, filename)
|
||||
cv2.imwrite(path, frame) # 保存原图
|
||||
print(f"✅ 已保存: {path}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 用户中断")
|
||||
break
|
||||
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"视频流关闭,共处理 {frame_count} 帧")
|
||||
|
||||
rknn.release()
|
||||
print("程序结束")
|
||||
|
||||
205
detect_image/capture-image_2.py
Normal file
205
detect_image/capture-image_2.py
Normal file
@ -0,0 +1,205 @@
|
||||
import cv2
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from skimage.metrics import structural_similarity as ssim
|
||||
from rknnlite.api import RKNNLite
|
||||
|
||||
# ================== 配置 ==================
|
||||
RTSP_URL = "rtsp://admin:ailaimiye123@192.168.0.234:554/streaming/channels/101"
|
||||
RKNN_MODEL = "bag3568.rknn"
|
||||
OUTPUT_DIR = "camera_event_capture"
|
||||
|
||||
CONF_THRESHOLD = 0.5
|
||||
SSIM_THRESHOLD = 0.9
|
||||
|
||||
END_MISS_FRAMES = 30 # 连续多少帧未检测到 → 结束采集
|
||||
SAVE_EVERY_N_FRAMES = 1 # 采集中每 N 帧保存一次
|
||||
SHOW_WINDOW = False
|
||||
|
||||
# 灰度判断参数
|
||||
GRAY_LOWER = 70
|
||||
GRAY_UPPER = 230
|
||||
GRAY_RATIO_THRESHOLD = 0.7
|
||||
|
||||
IMG_SIZE = (640, 640)
|
||||
OBJ_THRESH = 0.001
|
||||
NMS_THRESH = 0.45
|
||||
CLASS_NAME = ["bag"]
|
||||
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# ================== 灰度判断 ==================
|
||||
def is_large_gray(image):
|
||||
img = np.array(image)
|
||||
if img.ndim != 3 or img.shape[2] != 3:
|
||||
return True
|
||||
h, w, _ = img.shape
|
||||
gray_mask = (
|
||||
(img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
|
||||
(img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
|
||||
(img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
|
||||
)
|
||||
return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
|
||||
|
||||
# ================== RKNN 推理工具 ==================
|
||||
def letterbox_resize(image, size, bg_color=114):
|
||||
target_w, target_h = size
|
||||
h, w = image.shape[:2]
|
||||
scale = min(target_w / w, target_h / h)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
resized = cv2.resize(image, (new_w, new_h))
|
||||
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
|
||||
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
|
||||
canvas[dy:dy + new_h, dx:dx + new_w] = resized
|
||||
return canvas, scale, dx, dy
|
||||
|
||||
def dfl_numpy(position):
|
||||
n, c, h, w = position.shape
|
||||
p_num = 4
|
||||
mc = c // p_num
|
||||
y = position.reshape(n, p_num, mc, h, w)
|
||||
y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
|
||||
acc = np.arange(mc).reshape(1,1,mc,1,1)
|
||||
return np.sum(y * acc, axis=2)
|
||||
|
||||
def box_process(position):
|
||||
grid_h, grid_w = position.shape[2:4]
|
||||
col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
|
||||
col = col.reshape(1,1,grid_h,grid_w)
|
||||
row = row.reshape(1,1,grid_h,grid_w)
|
||||
grid = np.concatenate((col,row), axis=1)
|
||||
stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
|
||||
position = dfl_numpy(position)
|
||||
box_xy = grid + 0.5 - position[:,0:2,:,:]
|
||||
box_xy2 = grid + 0.5 + position[:,2:4,:,:]
|
||||
return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
|
||||
|
||||
def filter_boxes(boxes, box_confidences, box_class_probs):
|
||||
boxes = boxes.reshape(-1,4)
|
||||
box_confidences = box_confidences.reshape(-1)
|
||||
box_class_probs = np.array(box_class_probs)
|
||||
|
||||
class_ids = np.argmax(box_class_probs, axis=-1)
|
||||
class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
|
||||
scores = box_confidences * class_scores
|
||||
|
||||
mask = scores >= OBJ_THRESH
|
||||
return np.sum(mask) > 0 # True: 有 bag, False: 无 bag
|
||||
|
||||
def post_process(outputs, scale, dx, dy):
|
||||
boxes_list, conf_list, class_list = [], [], []
|
||||
for i in range(3):
|
||||
boxes_list.append(box_process(outputs[i*3]))
|
||||
conf_list.append(outputs[i*3+2])
|
||||
class_list.append(outputs[i*3+1])
|
||||
|
||||
def flatten(x):
|
||||
x = x.transpose(0,2,3,1)
|
||||
return x.reshape(-1,x.shape[3])
|
||||
|
||||
boxes = np.concatenate([flatten(b) for b in boxes_list])
|
||||
box_conf = np.concatenate([flatten(c) for c in conf_list])
|
||||
class_probs = np.concatenate([flatten(c) for c in class_list])
|
||||
return filter_boxes(boxes, box_conf, class_probs)
|
||||
|
||||
# ================== RKNN 初始化 ==================
|
||||
rknn = RKNNLite()
|
||||
if rknn.load_rknn(RKNN_MODEL) != 0:
|
||||
raise RuntimeError("RKNN 模型加载失败")
|
||||
if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
|
||||
raise RuntimeError("RKNN Runtime 初始化失败")
|
||||
print("✅ RKNN 初始化完成")
|
||||
|
||||
# ================== 视频流 ==================
|
||||
cap = cv2.VideoCapture(RTSP_URL)
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("RTSP 连接失败")
|
||||
print("🎥 视频流已连接")
|
||||
|
||||
# ================== 状态机 ==================
|
||||
STATE_IDLE = 0
|
||||
STATE_CAPTURING = 1
|
||||
|
||||
state = STATE_IDLE
|
||||
miss_count = 0
|
||||
save_idx = 0
|
||||
session_dir = None
|
||||
session_id = 0
|
||||
last_gray = None
|
||||
frame_count = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
time.sleep(0.5)
|
||||
continue
|
||||
|
||||
frame_count += 1
|
||||
|
||||
if SHOW_WINDOW:
|
||||
cv2.imshow("Camera", frame)
|
||||
if cv2.waitKey(1) == ord('q'):
|
||||
break
|
||||
|
||||
# ---------- 灰度过滤 ----------
|
||||
pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
if is_large_gray(pil_image):
|
||||
continue
|
||||
|
||||
# ---------- SSIM 去重 ----------
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
if last_gray is not None and state == STATE_IDLE:
|
||||
sim = ssim(gray, last_gray)
|
||||
if sim > SSIM_THRESHOLD:
|
||||
continue
|
||||
last_gray = gray.copy()
|
||||
|
||||
# ---------- RKNN 推理判断是否有 bag ----------
|
||||
img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
|
||||
input_data = np.expand_dims(img_resized, 0)
|
||||
outputs = rknn.inference(inputs=[input_data])
|
||||
has_bag = post_process(outputs, scale, dx, dy)
|
||||
|
||||
# ---------- 状态机 ----------
|
||||
if state == STATE_IDLE:
|
||||
if has_bag:
|
||||
session_id += 1
|
||||
ts = time.strftime("%Y%m%d_%H%M%S")
|
||||
session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}")
|
||||
os.makedirs(session_dir, exist_ok=True)
|
||||
print(f"\n🚀 进入采集")
|
||||
state = STATE_CAPTURING
|
||||
miss_count = 0
|
||||
save_idx = 0
|
||||
|
||||
elif state == STATE_CAPTURING:
|
||||
if has_bag:
|
||||
miss_count = 0
|
||||
else:
|
||||
miss_count += 1
|
||||
|
||||
if save_idx % SAVE_EVERY_N_FRAMES == 0:
|
||||
ts = time.strftime("%Y%m%d_%H%M%S")
|
||||
ms = int((time.time()%1)*1000)
|
||||
fname = f"{save_idx:06d}_{ts}_{ms:03d}.png"
|
||||
cv2.imwrite(os.path.join(session_dir, fname), frame) # 保存原图
|
||||
save_idx += 1
|
||||
|
||||
if miss_count >= END_MISS_FRAMES:
|
||||
print(f"🛑 退出采集,本次保存 {save_idx} 帧")
|
||||
state = STATE_IDLE
|
||||
miss_count = 0
|
||||
session_dir = None
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 用户退出")
|
||||
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
rknn.release()
|
||||
print("程序结束")
|
||||
|
||||
181
detect_image/detect_bag.py
Normal file
181
detect_image/detect_bag.py
Normal file
@ -0,0 +1,181 @@
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
from rknnlite.api import RKNNLite
|
||||
|
||||
# ====================== 配置 ======================
|
||||
MODEL_PATH = "bag3588.rknn"
|
||||
IMG_PATH = "2.jpg"
|
||||
IMG_SIZE = (640, 640)
|
||||
OBJ_THRESH = 0.001
|
||||
NMS_THRESH = 0.45
|
||||
CLASS_NAME = ["bag"]
|
||||
OUTPUT_DIR = "./result"
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# ====================== 全局 RKNN ======================
|
||||
_global_rknn = None
|
||||
|
||||
def init_rknn(model_path):
|
||||
global _global_rknn
|
||||
if _global_rknn is None:
|
||||
rknn = RKNNLite(verbose=False)
|
||||
rknn.load_rknn(model_path)
|
||||
rknn.init_runtime()
|
||||
_global_rknn = rknn
|
||||
return _global_rknn
|
||||
|
||||
# ====================== 工具函数 ======================
|
||||
def letterbox_resize(image, size, bg_color=114):
|
||||
target_w, target_h = size
|
||||
h, w = image.shape[:2]
|
||||
scale = min(target_w / w, target_h / h)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
resized = cv2.resize(image, (new_w, new_h))
|
||||
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
|
||||
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
|
||||
canvas[dy:dy + new_h, dx:dx + new_w] = resized
|
||||
return canvas, scale, dx, dy
|
||||
|
||||
def dfl_numpy(position):
|
||||
n, c, h, w = position.shape
|
||||
p_num = 4
|
||||
mc = c // p_num
|
||||
y = position.reshape(n, p_num, mc, h, w)
|
||||
y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
|
||||
acc = np.arange(mc).reshape(1,1,mc,1,1)
|
||||
y = np.sum(y * acc, axis=2)
|
||||
return y
|
||||
|
||||
def box_process(position):
|
||||
grid_h, grid_w = position.shape[2:4]
|
||||
col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
|
||||
col = col.reshape(1,1,grid_h,grid_w)
|
||||
row = row.reshape(1,1,grid_h,grid_w)
|
||||
grid = np.concatenate((col,row), axis=1)
|
||||
stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1)
|
||||
position = dfl_numpy(position)
|
||||
box_xy = grid + 0.5 - position[:,0:2,:,:]
|
||||
box_xy2 = grid + 0.5 + position[:,2:4,:,:]
|
||||
xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
|
||||
return xyxy
|
||||
|
||||
def filter_boxes(boxes, box_confidences, box_class_probs):
|
||||
boxes = np.array(boxes).reshape(-1, 4)
|
||||
box_confidences = np.array(box_confidences).reshape(-1)
|
||||
box_class_probs = np.array(box_class_probs)
|
||||
|
||||
class_ids = np.argmax(box_class_probs, axis=-1)
|
||||
class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
|
||||
scores = box_confidences * class_scores
|
||||
|
||||
mask = scores >= OBJ_THRESH
|
||||
if np.sum(mask) == 0:
|
||||
return None, None, None, None
|
||||
|
||||
boxes = boxes[mask]
|
||||
classes = class_ids[mask]
|
||||
scores = scores[mask]
|
||||
conf_keep = box_confidences[mask]
|
||||
|
||||
x1, y1, x2, y2 = boxes[:,0], boxes[:,1], boxes[:,2], boxes[:,3]
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
w = np.maximum(0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
inds = np.where(ovr <= NMS_THRESH)[0]
|
||||
order = order[inds + 1]
|
||||
return boxes[keep], classes[keep], scores[keep], conf_keep[keep]
|
||||
|
||||
def post_process(outputs, scale, dx, dy):
|
||||
boxes_list, conf_list, class_list = [], [], []
|
||||
branch_num = 3
|
||||
for i in range(branch_num):
|
||||
boxes_list.append(box_process(outputs[i*3]))
|
||||
conf_list.append(outputs[i*3+2])
|
||||
class_list.append(outputs[i*3+1])
|
||||
|
||||
def flatten(x):
|
||||
ch = x.shape[1]
|
||||
x = x.transpose(0,2,3,1)
|
||||
return x.reshape(-1,ch)
|
||||
|
||||
boxes = np.concatenate([flatten(b) for b in boxes_list])
|
||||
box_conf = np.concatenate([flatten(c) for c in conf_list])
|
||||
class_probs = np.concatenate([flatten(c) for c in class_list])
|
||||
|
||||
boxes, classes, scores, conf_keep = filter_boxes(boxes, box_conf, class_probs)
|
||||
if boxes is None:
|
||||
return None, None, None, None
|
||||
|
||||
boxes[:, [0,2]] -= dx
|
||||
boxes[:, [1,3]] -= dy
|
||||
boxes /= scale
|
||||
boxes = boxes.clip(min=0)
|
||||
|
||||
scores = 1-scores
|
||||
conf_keep = conf_keep * 255
|
||||
return boxes, classes, scores, conf_keep
|
||||
|
||||
# ====================== detect_bag ======================
|
||||
def detect_bag(img, return_conf=True, return_vis=False):
|
||||
rknn = init_rknn(MODEL_PATH)
|
||||
|
||||
img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE)
|
||||
input_data = np.expand_dims(img_resized, 0)
|
||||
outputs = rknn.inference(inputs=[input_data])
|
||||
boxes, classes, scores, conf_keep = post_process(outputs, scale, dx, dy)
|
||||
|
||||
if boxes is None or len(boxes) == 0:
|
||||
return (None, None) if return_conf else (None,)
|
||||
|
||||
min_x = float(boxes[:,0].min())
|
||||
conf_val = float(scores.max()) if return_conf else None
|
||||
vis_img = None
|
||||
|
||||
if return_vis:
|
||||
vis_img = img.copy()
|
||||
for i, box in enumerate(boxes):
|
||||
x1, y1, x2, y2 = box.astype(int)
|
||||
cls_id = classes[i]
|
||||
score = scores[i]
|
||||
cv2.rectangle(vis_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(vis_img,
|
||||
f"{CLASS_NAME[cls_id]}:{score:.1f}",
|
||||
(x1, max(y1-5,0)),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.6,
|
||||
(0, 255, 0),
|
||||
2)
|
||||
save_path = os.path.join(OUTPUT_DIR, "vis_" + "result.jpg")
|
||||
cv2.imwrite(save_path, vis_img)
|
||||
|
||||
if return_conf:
|
||||
return conf_val, min_x
|
||||
else:
|
||||
return min_x, vis_img
|
||||
|
||||
# ====================== 测试 ======================
|
||||
if __name__ == "__main__":
|
||||
img = cv2.imread(IMG_PATH)
|
||||
if img is None:
|
||||
raise FileNotFoundError(f"图片无法读取: {IMG_PATH}")
|
||||
|
||||
# 可控制输出:conf, vis
|
||||
conf, min_x = detect_bag(img, return_conf=True, return_vis=True)
|
||||
|
||||
if conf is None:
|
||||
print("❌ 未检测到 bag")
|
||||
else:
|
||||
print(f"✅ 最大置信度: {conf:.4f}, 最左 x: {min_x:.1f}")
|
||||
|
||||
202
detect_image/image_01_3588.py
Normal file
202
detect_image/image_01_3588.py
Normal file
@ -0,0 +1,202 @@
|
||||
import cv2
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from skimage.metrics import structural_similarity as ssim
|
||||
import shutil
|
||||
from rknnlite.api import RKNNLite
|
||||
|
||||
# ================== 配置参数 ==================
|
||||
RTSP_URL = "rtsp://admin:XJ123456@192.168.250.60:554/streaming/channels/101"
|
||||
SAVE_INTERVAL = 15
|
||||
SSIM_THRESHOLD = 0.9
|
||||
OUTPUT_DIR = "camera_test"
|
||||
RKNN_MODEL = "bag3588.rknn"
|
||||
SHOW_WINDOW = False
|
||||
|
||||
# 灰度判断参数
|
||||
GRAY_LOWER = 70
|
||||
GRAY_UPPER = 230
|
||||
GRAY_RATIO_THRESHOLD = 0.7
|
||||
|
||||
IMG_SIZE = (640, 640)
|
||||
OBJ_THRESH = 0.001
|
||||
NMS_THRESH = 0.45
|
||||
CLASS_NAME = ["bag"]
|
||||
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# ================== 灰度判断 ==================
|
||||
def is_large_gray(image):
|
||||
img = np.array(image)
|
||||
if img.ndim != 3 or img.shape[2] != 3:
|
||||
return True
|
||||
h, w, _ = img.shape
|
||||
gray_mask = (
|
||||
(img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
|
||||
(img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
|
||||
(img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
|
||||
)
|
||||
return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
|
||||
|
||||
# ================== RKNN 工具函数 ==================
|
||||
def letterbox_resize(image, size, bg_color=114):
|
||||
target_w, target_h = size
|
||||
h, w = image.shape[:2]
|
||||
scale = min(target_w / w, target_h / h)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
resized = cv2.resize(image, (new_w, new_h))
|
||||
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
|
||||
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
|
||||
canvas[dy:dy + new_h, dx:dx + new_w] = resized
|
||||
return canvas, scale, dx, dy
|
||||
|
||||
def dfl_numpy(position):
|
||||
n, c, h, w = position.shape
|
||||
p_num = 4
|
||||
mc = c // p_num
|
||||
y = position.reshape(n, p_num, mc, h, w)
|
||||
y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
|
||||
acc = np.arange(mc).reshape(1,1,mc,1,1)
|
||||
return np.sum(y * acc, axis=2)
|
||||
|
||||
def box_process(position):
|
||||
grid_h, grid_w = position.shape[2:4]
|
||||
col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
|
||||
col = col.reshape(1,1,grid_h,grid_w)
|
||||
row = row.reshape(1,1,grid_h,grid_w)
|
||||
grid = np.concatenate((col,row), axis=1)
|
||||
stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
|
||||
position = dfl_numpy(position)
|
||||
box_xy = grid + 0.5 - position[:,0:2,:,:]
|
||||
box_xy2 = grid + 0.5 + position[:,2:4,:,:]
|
||||
return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
|
||||
|
||||
def filter_boxes(boxes, box_confidences, box_class_probs):
|
||||
boxes = boxes.reshape(-1,4)
|
||||
box_confidences = box_confidences.reshape(-1)
|
||||
box_class_probs = np.array(box_class_probs)
|
||||
|
||||
class_ids = np.argmax(box_class_probs, axis=-1)
|
||||
class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
|
||||
scores = box_confidences * class_scores
|
||||
|
||||
mask = scores >= OBJ_THRESH
|
||||
if np.sum(mask) == 0:
|
||||
return None
|
||||
return True # 只需要判断是否有目标
|
||||
|
||||
def post_process(outputs, scale, dx, dy):
|
||||
boxes_list, conf_list, class_list = [], [], []
|
||||
for i in range(3):
|
||||
boxes_list.append(box_process(outputs[i*3]))
|
||||
conf_list.append(outputs[i*3+2])
|
||||
class_list.append(outputs[i*3+1])
|
||||
|
||||
def flatten(x):
|
||||
x = x.transpose(0,2,3,1)
|
||||
return x.reshape(-1,x.shape[3])
|
||||
|
||||
boxes = np.concatenate([flatten(b) for b in boxes_list])
|
||||
box_conf = np.concatenate([flatten(c) for c in conf_list])
|
||||
class_probs = np.concatenate([flatten(c) for c in class_list])
|
||||
return filter_boxes(boxes, box_conf, class_probs)
|
||||
|
||||
# ================== RKNN 初始化 ==================
|
||||
rknn = RKNNLite()
|
||||
if rknn.load_rknn(RKNN_MODEL) != 0:
|
||||
raise RuntimeError("❌ RKNN 模型加载失败")
|
||||
if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
|
||||
raise RuntimeError("❌ RKNN Runtime 初始化失败")
|
||||
print("✅ RKNN 初始化完成")
|
||||
|
||||
# ================== 视频流处理 ==================
|
||||
max_retry_seconds = 10
|
||||
retry_interval_seconds = 1
|
||||
|
||||
last_gray = None
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
cap = cv2.VideoCapture(RTSP_URL)
|
||||
start_time = time.time()
|
||||
|
||||
while not cap.isOpened():
|
||||
if time.time() - start_time >= max_retry_seconds:
|
||||
print("❌ 无法连接 RTSP")
|
||||
exit(1)
|
||||
time.sleep(retry_interval_seconds)
|
||||
cap = cv2.VideoCapture(RTSP_URL)
|
||||
|
||||
print("✅ 开始读取视频流")
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("❌ 读取失败")
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
if SHOW_WINDOW:
|
||||
cv2.imshow("Camera", frame)
|
||||
if cv2.waitKey(1) == ord('q'):
|
||||
raise KeyboardInterrupt
|
||||
|
||||
if frame_count % SAVE_INTERVAL != 0:
|
||||
continue
|
||||
|
||||
print(f"处理帧 {frame_count}")
|
||||
|
||||
# STEP1: 灰度过滤
|
||||
pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
if is_large_gray(pil_image):
|
||||
print("跳过:大面积灰色")
|
||||
continue
|
||||
|
||||
# STEP2: SSIM 去重
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
if last_gray is not None:
|
||||
sim = ssim(gray, last_gray)
|
||||
if sim > SSIM_THRESHOLD:
|
||||
print(f"跳过:SSIM={sim:.3f}")
|
||||
continue
|
||||
last_gray = gray.copy()
|
||||
|
||||
# STEP3: RKNN 推理,只判断是否有 bag
|
||||
img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
|
||||
input_data = np.expand_dims(img_resized, 0)
|
||||
outputs = rknn.inference(inputs=[input_data])
|
||||
has_bag = post_process(outputs, scale, dx, dy)
|
||||
if not has_bag:
|
||||
print("跳过:未检测到 bag")
|
||||
continue
|
||||
|
||||
# STEP4: 磁盘检查
|
||||
_, _, free = shutil.disk_usage(OUTPUT_DIR)
|
||||
if free < 5*1024**3:
|
||||
print("❌ 磁盘空间不足")
|
||||
raise SystemExit(1)
|
||||
|
||||
# STEP5: 保存原图
|
||||
ts = time.strftime("%Y%m%d_%H%M%S")
|
||||
ms = int((time.time()%1)*1000)
|
||||
filename = f"bag_{ts}_{ms:03d}.png"
|
||||
path = os.path.join(OUTPUT_DIR, filename)
|
||||
cv2.imwrite(path, frame) # 保存原图
|
||||
print(f"✅ 已保存: {path}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 用户中断")
|
||||
break
|
||||
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"视频流关闭,共处理 {frame_count} 帧")
|
||||
|
||||
rknn.release()
|
||||
print("程序结束")
|
||||
|
||||
205
detect_image/image_02_3588.py
Normal file
205
detect_image/image_02_3588.py
Normal file
@ -0,0 +1,205 @@
|
||||
import cv2
|
||||
import time
|
||||
import os
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from skimage.metrics import structural_similarity as ssim
|
||||
from rknnlite.api import RKNNLite
|
||||
|
||||
# ================== 配置 ==================
|
||||
RTSP_URL = "rtsp://admin:XJ123456@192.168.250.60:554/streaming/channels/101"
|
||||
RKNN_MODEL = "bag3588.rknn"
|
||||
OUTPUT_DIR = "camera_event_capture"
|
||||
|
||||
CONF_THRESHOLD = 0.5
|
||||
SSIM_THRESHOLD = 0.9
|
||||
|
||||
END_MISS_FRAMES = 30 # 连续多少帧未检测到 → 结束采集
|
||||
SAVE_EVERY_N_FRAMES = 1 # 采集中每 N 帧保存一次
|
||||
SHOW_WINDOW = False
|
||||
|
||||
# 灰度判断参数
|
||||
GRAY_LOWER = 70
|
||||
GRAY_UPPER = 230
|
||||
GRAY_RATIO_THRESHOLD = 0.7
|
||||
|
||||
IMG_SIZE = (640, 640)
|
||||
OBJ_THRESH = 0.001
|
||||
NMS_THRESH = 0.45
|
||||
CLASS_NAME = ["bag"]
|
||||
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# ================== 灰度判断 ==================
|
||||
def is_large_gray(image):
|
||||
img = np.array(image)
|
||||
if img.ndim != 3 or img.shape[2] != 3:
|
||||
return True
|
||||
h, w, _ = img.shape
|
||||
gray_mask = (
|
||||
(img[:, :, 0] >= GRAY_LOWER) & (img[:, :, 0] <= GRAY_UPPER) &
|
||||
(img[:, :, 1] >= GRAY_LOWER) & (img[:, :, 1] <= GRAY_UPPER) &
|
||||
(img[:, :, 2] >= GRAY_LOWER) & (img[:, :, 2] <= GRAY_UPPER)
|
||||
)
|
||||
return gray_mask.sum() / (h * w) > GRAY_RATIO_THRESHOLD
|
||||
|
||||
# ================== RKNN 推理工具 ==================
|
||||
def letterbox_resize(image, size, bg_color=114):
|
||||
target_w, target_h = size
|
||||
h, w = image.shape[:2]
|
||||
scale = min(target_w / w, target_h / h)
|
||||
new_w, new_h = int(w * scale), int(h * scale)
|
||||
resized = cv2.resize(image, (new_w, new_h))
|
||||
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
|
||||
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
|
||||
canvas[dy:dy + new_h, dx:dx + new_w] = resized
|
||||
return canvas, scale, dx, dy
|
||||
|
||||
def dfl_numpy(position):
|
||||
n, c, h, w = position.shape
|
||||
p_num = 4
|
||||
mc = c // p_num
|
||||
y = position.reshape(n, p_num, mc, h, w)
|
||||
y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
|
||||
acc = np.arange(mc).reshape(1,1,mc,1,1)
|
||||
return np.sum(y * acc, axis=2)
|
||||
|
||||
def box_process(position):
|
||||
grid_h, grid_w = position.shape[2:4]
|
||||
col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
|
||||
col = col.reshape(1,1,grid_h,grid_w)
|
||||
row = row.reshape(1,1,grid_h,grid_w)
|
||||
grid = np.concatenate((col,row), axis=1)
|
||||
stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1)
|
||||
position = dfl_numpy(position)
|
||||
box_xy = grid + 0.5 - position[:,0:2,:,:]
|
||||
box_xy2 = grid + 0.5 + position[:,2:4,:,:]
|
||||
return np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
|
||||
|
||||
def filter_boxes(boxes, box_confidences, box_class_probs):
|
||||
boxes = boxes.reshape(-1,4)
|
||||
box_confidences = box_confidences.reshape(-1)
|
||||
box_class_probs = np.array(box_class_probs)
|
||||
|
||||
class_ids = np.argmax(box_class_probs, axis=-1)
|
||||
class_scores = box_class_probs[np.arange(len(class_ids)), class_ids]
|
||||
scores = box_confidences * class_scores
|
||||
|
||||
mask = scores >= OBJ_THRESH
|
||||
return np.sum(mask) > 0 # True: 有 bag, False: 无 bag
|
||||
|
||||
def post_process(outputs, scale, dx, dy):
|
||||
boxes_list, conf_list, class_list = [], [], []
|
||||
for i in range(3):
|
||||
boxes_list.append(box_process(outputs[i*3]))
|
||||
conf_list.append(outputs[i*3+2])
|
||||
class_list.append(outputs[i*3+1])
|
||||
|
||||
def flatten(x):
|
||||
x = x.transpose(0,2,3,1)
|
||||
return x.reshape(-1,x.shape[3])
|
||||
|
||||
boxes = np.concatenate([flatten(b) for b in boxes_list])
|
||||
box_conf = np.concatenate([flatten(c) for c in conf_list])
|
||||
class_probs = np.concatenate([flatten(c) for c in class_list])
|
||||
return filter_boxes(boxes, box_conf, class_probs)
|
||||
|
||||
# ================== RKNN 初始化 ==================
|
||||
rknn = RKNNLite()
|
||||
if rknn.load_rknn(RKNN_MODEL) != 0:
|
||||
raise RuntimeError("RKNN 模型加载失败")
|
||||
if rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO) != 0:
|
||||
raise RuntimeError("RKNN Runtime 初始化失败")
|
||||
print("✅ RKNN 初始化完成")
|
||||
|
||||
# ================== 视频流 ==================
|
||||
cap = cv2.VideoCapture(RTSP_URL)
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("RTSP 连接失败")
|
||||
print("🎥 视频流已连接")
|
||||
|
||||
# ================== 状态机 ==================
|
||||
STATE_IDLE = 0
|
||||
STATE_CAPTURING = 1
|
||||
|
||||
state = STATE_IDLE
|
||||
miss_count = 0
|
||||
save_idx = 0
|
||||
session_dir = None
|
||||
session_id = 0
|
||||
last_gray = None
|
||||
frame_count = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
time.sleep(0.5)
|
||||
continue
|
||||
|
||||
frame_count += 1
|
||||
|
||||
if SHOW_WINDOW:
|
||||
cv2.imshow("Camera", frame)
|
||||
if cv2.waitKey(1) == ord('q'):
|
||||
break
|
||||
|
||||
# ---------- 灰度过滤 ----------
|
||||
pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
if is_large_gray(pil_image):
|
||||
continue
|
||||
|
||||
# ---------- SSIM 去重 ----------
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
if last_gray is not None and state == STATE_IDLE:
|
||||
sim = ssim(gray, last_gray)
|
||||
if sim > SSIM_THRESHOLD:
|
||||
continue
|
||||
last_gray = gray.copy()
|
||||
|
||||
# ---------- RKNN 推理判断是否有 bag ----------
|
||||
img_resized, scale, dx, dy = letterbox_resize(frame, IMG_SIZE)
|
||||
input_data = np.expand_dims(img_resized, 0)
|
||||
outputs = rknn.inference(inputs=[input_data])
|
||||
has_bag = post_process(outputs, scale, dx, dy)
|
||||
|
||||
# ---------- 状态机 ----------
|
||||
if state == STATE_IDLE:
|
||||
if has_bag:
|
||||
session_id += 1
|
||||
ts = time.strftime("%Y%m%d_%H%M%S")
|
||||
session_dir = os.path.join(OUTPUT_DIR, f"session_{session_id:04d}_{ts}")
|
||||
os.makedirs(session_dir, exist_ok=True)
|
||||
print(f"\n🚀 进入采集")
|
||||
state = STATE_CAPTURING
|
||||
miss_count = 0
|
||||
save_idx = 0
|
||||
|
||||
elif state == STATE_CAPTURING:
|
||||
if has_bag:
|
||||
miss_count = 0
|
||||
else:
|
||||
miss_count += 1
|
||||
|
||||
if save_idx % SAVE_EVERY_N_FRAMES == 0:
|
||||
ts = time.strftime("%Y%m%d_%H%M%S")
|
||||
ms = int((time.time()%1)*1000)
|
||||
fname = f"{save_idx:06d}_{ts}_{ms:03d}.png"
|
||||
cv2.imwrite(os.path.join(session_dir, fname), frame) # 保存原图
|
||||
save_idx += 1
|
||||
|
||||
if miss_count >= END_MISS_FRAMES:
|
||||
print(f"🛑 退出采集,本次保存 {save_idx} 帧")
|
||||
state = STATE_IDLE
|
||||
miss_count = 0
|
||||
session_dir = None
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 用户退出")
|
||||
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
rknn.release()
|
||||
print("程序结束")
|
||||
|
||||
Reference in New Issue
Block a user