139 lines
4.8 KiB
Python
139 lines
4.8 KiB
Python
|
|
import os
|
||
|
|
import cv2
|
||
|
|
import numpy as np
|
||
|
|
from rknnlite.api import RKNNLite
|
||
|
|
|
||
|
|
# ====================== 配置 ======================
|
||
|
|
MODEL_PATH = "yolo11.rknn" # RKNN 模型路径
|
||
|
|
IMG_PATH = "11.jpg" # 待检测图片
|
||
|
|
IMG_SIZE = (640, 640) # 模型输入尺寸 (w,h)
|
||
|
|
OBJ_THRESH = 0.001 # 目标置信度阈值
|
||
|
|
NMS_THRESH = 0.45 # NMS 阈值
|
||
|
|
CLASS_NAME = ["bag"] # 单类别
|
||
|
|
OUTPUT_DIR = "./result"
|
||
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||
|
|
|
||
|
|
# ====================== 工具函数 ======================
|
||
|
|
def letterbox_resize(image, size, bg_color=114):
|
||
|
|
target_w, target_h = size
|
||
|
|
h, w = image.shape[:2]
|
||
|
|
scale = min(target_w / w, target_h / h)
|
||
|
|
new_w, new_h = int(w * scale), int(h * scale)
|
||
|
|
resized = cv2.resize(image, (new_w, new_h))
|
||
|
|
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
|
||
|
|
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
|
||
|
|
canvas[dy:dy + new_h, dx:dx + new_w] = resized
|
||
|
|
return canvas, scale, dx, dy
|
||
|
|
|
||
|
|
def dfl_numpy(position):
|
||
|
|
"""Distribution Focal Loss 解析,纯 NumPy 版本"""
|
||
|
|
n, c, h, w = position.shape
|
||
|
|
p_num = 4
|
||
|
|
mc = c // p_num
|
||
|
|
y = position.reshape(n, p_num, mc, h, w)
|
||
|
|
y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
|
||
|
|
acc = np.arange(mc).reshape(1,1,mc,1,1)
|
||
|
|
y = np.sum(y * acc, axis=2)
|
||
|
|
return y
|
||
|
|
|
||
|
|
def box_process(position):
|
||
|
|
"""解析网络输出的框坐标"""
|
||
|
|
grid_h, grid_w = position.shape[2:4]
|
||
|
|
col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
|
||
|
|
col = col.reshape(1,1,grid_h,grid_w)
|
||
|
|
row = row.reshape(1,1,grid_h,grid_w)
|
||
|
|
grid = np.concatenate((col,row), axis=1)
|
||
|
|
stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1)
|
||
|
|
|
||
|
|
position = dfl_numpy(position)
|
||
|
|
box_xy = grid + 0.5 - position[:,0:2,:,:]
|
||
|
|
box_xy2 = grid + 0.5 + position[:,2:4,:,:]
|
||
|
|
xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
|
||
|
|
return xyxy
|
||
|
|
|
||
|
|
def filter_boxes(boxes, box_confidences, box_class_probs):
|
||
|
|
# sigmoid objectness
|
||
|
|
box_confidences = 1 / (1 + np.exp(-box_confidences))
|
||
|
|
# softmax class probs
|
||
|
|
box_class_probs = np.exp(box_class_probs)
|
||
|
|
box_class_probs /= np.sum(box_class_probs, axis=-1, keepdims=True)
|
||
|
|
|
||
|
|
box_confidences = box_confidences.reshape(-1)
|
||
|
|
class_max_score = np.max(box_class_probs, axis=-1)
|
||
|
|
classes = np.argmax(box_class_probs, axis=-1)
|
||
|
|
_pos = np.where(class_max_score * box_confidences >= OBJ_THRESH)
|
||
|
|
boxes = boxes[_pos]
|
||
|
|
classes = classes[_pos]
|
||
|
|
scores = (class_max_score * box_confidences)[_pos]
|
||
|
|
return boxes, classes, scores
|
||
|
|
|
||
|
|
def post_process(outputs, scale, dx, dy):
|
||
|
|
boxes, classes_conf, scores = [], [], []
|
||
|
|
branch_num = 3
|
||
|
|
for i in range(branch_num):
|
||
|
|
boxes.append(box_process(outputs[i*3]))
|
||
|
|
classes_conf.append(outputs[i*3+1])
|
||
|
|
scores.append(outputs[i*3+2]) # 使用真实 class 输出
|
||
|
|
|
||
|
|
def sp_flatten(x):
|
||
|
|
ch = x.shape[1]
|
||
|
|
x = x.transpose(0,2,3,1)
|
||
|
|
return x.reshape(-1,ch)
|
||
|
|
|
||
|
|
boxes = np.concatenate([sp_flatten(b) for b in boxes])
|
||
|
|
classes_conf = np.concatenate([sp_flatten(c) for c in classes_conf])
|
||
|
|
scores = np.concatenate([sp_flatten(s) for s in scores])
|
||
|
|
|
||
|
|
boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)
|
||
|
|
|
||
|
|
if boxes.shape[0] == 0:
|
||
|
|
return None, None, None
|
||
|
|
|
||
|
|
# 只保留置信度最高的框
|
||
|
|
max_idx = np.argmax(scores)
|
||
|
|
boxes = boxes[max_idx:max_idx+1]
|
||
|
|
classes = classes[max_idx:max_idx+1]
|
||
|
|
scores = scores[max_idx:max_idx+1]
|
||
|
|
|
||
|
|
# 映射回原图
|
||
|
|
boxes[:, [0,2]] -= dx
|
||
|
|
boxes[:, [1,3]] -= dy
|
||
|
|
boxes /= scale
|
||
|
|
boxes = boxes.clip(min=0)
|
||
|
|
|
||
|
|
return boxes, classes, scores
|
||
|
|
|
||
|
|
def draw(image, boxes, scores, classes):
|
||
|
|
for box, score, cl in zip(boxes, scores, classes):
|
||
|
|
x1, y1, x2, y2 = [int(b) for b in box]
|
||
|
|
cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 2)
|
||
|
|
cv2.putText(image, f"{CLASS_NAME[cl]} {score:.3f}", (x1, y1-5),
|
||
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)
|
||
|
|
|
||
|
|
# ====================== 主流程 ======================
|
||
|
|
img = cv2.imread(IMG_PATH)
|
||
|
|
if img is None:
|
||
|
|
raise ValueError(f"Image {IMG_PATH} not found!")
|
||
|
|
|
||
|
|
img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE)
|
||
|
|
input_data = np.expand_dims(img_resized, 0) # 4 维输入
|
||
|
|
|
||
|
|
rknn = RKNNLite(verbose=False)
|
||
|
|
rknn.load_rknn(MODEL_PATH)
|
||
|
|
rknn.init_runtime()
|
||
|
|
outputs = rknn.inference([input_data])
|
||
|
|
rknn.release()
|
||
|
|
|
||
|
|
print("Outputs len:", len(outputs))
|
||
|
|
for i, out in enumerate(outputs):
|
||
|
|
print(f"outputs[{i}].shape = {out.shape}, min={out.min()}, max={out.max()}, mean={out.mean():.4f}")
|
||
|
|
|
||
|
|
boxes, classes, scores = post_process(outputs, scale, dx, dy)
|
||
|
|
if boxes is None:
|
||
|
|
print("Detected 0 boxes")
|
||
|
|
else:
|
||
|
|
draw(img, boxes, scores, classes)
|
||
|
|
result_path = os.path.join(OUTPUT_DIR, os.path.basename(IMG_PATH))
|
||
|
|
cv2.imwrite(result_path, img)
|
||
|
|
print(f"Detection result saved to {result_path}")
|