247 lines
9.0 KiB
Python
247 lines
9.0 KiB
Python
# 下面代码基于你给出的 yolov5 示例做最小修改的适配版
|
||
import cv2
|
||
import numpy as np
|
||
|
||
OBJ_THRESH, NMS_THRESH, IMG_SIZE = 0.25, 0.45, 640
|
||
|
||
CLASSES = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light",
|
||
"fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant",
|
||
"bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
|
||
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ",
|
||
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa",
|
||
"pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop\t", "mouse\t", "remote ", "keyboard ", "cell phone", "microwave ",
|
||
"oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ")
|
||
|
||
# ---------- 保留你原来的辅助函数(process/filter/nms/xywh2xyxy) ----------
|
||
def xywh2xyxy(x):
|
||
y = np.copy(x)
|
||
y[:, 0] = x[:, 0] - x[:, 2] / 2
|
||
y[:, 1] = x[:, 1] - x[:, 3] / 2
|
||
y[:, 2] = x[:, 0] + x[:, 2] / 2
|
||
y[:, 3] = x[:, 1] + x[:, 3] / 2
|
||
return y
|
||
|
||
|
||
def process(input, mask, anchors):
|
||
# input: (grid_h, grid_w, 3, attrs) attrs >=5+num_classes
|
||
# anchors: list of (w,h) pairs for the 3 anchors
|
||
anchors = [anchors[i] for i in mask]
|
||
grid_h, grid_w = map(int, input.shape[0:2])
|
||
|
||
box_confidence = input[..., 4]
|
||
box_confidence = np.expand_dims(box_confidence, axis=-1)
|
||
|
||
box_class_probs = input[..., 5:]
|
||
|
||
# YOLO11 style decode used originally in your code:
|
||
box_xy = input[..., :2] * 2 - 0.5
|
||
|
||
# build grid
|
||
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
|
||
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
|
||
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
|
||
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
|
||
grid = np.concatenate((col, row), axis=-1)
|
||
box_xy += grid
|
||
box_xy *= int(IMG_SIZE / grid_h)
|
||
|
||
box_wh = pow(input[..., 2:4] * 2, 2)
|
||
# multiply by provided anchors (we will use unit anchors if we want to neutralize)
|
||
box_wh = box_wh * anchors
|
||
|
||
return np.concatenate((box_xy, box_wh), axis=-1), box_confidence, box_class_probs
|
||
|
||
|
||
def filter_boxes(boxes, box_confidences, box_class_probs):
|
||
boxes = boxes.reshape(-1, 4)
|
||
box_confidences = box_confidences.reshape(-1)
|
||
box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])
|
||
|
||
_box_pos = np.where(box_confidences >= OBJ_THRESH)
|
||
boxes = boxes[_box_pos]
|
||
box_confidences = box_confidences[_box_pos]
|
||
box_class_probs = box_class_probs[_box_pos]
|
||
|
||
class_max_score = np.max(box_class_probs, axis=-1)
|
||
classes = np.argmax(box_class_probs, axis=-1)
|
||
_class_pos = np.where(class_max_score >= OBJ_THRESH)
|
||
|
||
return boxes[_class_pos], classes[_class_pos], (class_max_score * box_confidences)[_class_pos]
|
||
|
||
|
||
def nms_boxes(boxes, scores):
|
||
x = boxes[:, 0]
|
||
y = boxes[:, 1]
|
||
w = boxes[:, 2] - boxes[:, 0]
|
||
h = boxes[:, 3] - boxes[:, 1]
|
||
|
||
areas = w * h
|
||
order = scores.argsort()[::-1]
|
||
|
||
keep = []
|
||
eps = 1e-7
|
||
while order.size > 0:
|
||
i = order[0]
|
||
keep.append(i)
|
||
|
||
xx1 = np.maximum(x[i], x[order[1:]])
|
||
yy1 = np.maximum(y[i], y[order[1:]])
|
||
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
|
||
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
|
||
|
||
w1 = np.maximum(0.0, xx2 - xx1 + 1e-5)
|
||
h1 = np.maximum(0.0, yy2 - yy1 + 1e-5)
|
||
inter = w1 * h1
|
||
|
||
denom = (areas[i] + areas[order[1:]] - inter)
|
||
denom = np.maximum(denom, eps)
|
||
ovr = inter / denom
|
||
inds = np.where(ovr <= NMS_THRESH)[0]
|
||
order = order[inds + 1]
|
||
return np.array(keep)
|
||
|
||
|
||
# ---------- 关键:把你的 9 输出拼成原来 yolov5_post_process 需要的 input_data 格式 ----------
|
||
def yolov11_to_yolov5_style_input(outputs):
|
||
"""
|
||
outputs: list of 9 tensors (1, C, H, W) in this order per your print:
|
||
[reg80, cls80, obj80, reg40, cls40, obj40, reg20, cls20, obj20]
|
||
We will convert each scale to a (H, W, 3, 5+num_classes) array and repeat
|
||
the same per-anchor slice so that your existing yolov5_post_process can be reused.
|
||
To avoid anchor scaling changing box_wh, we set anchors to 1x1 in later call.
|
||
"""
|
||
input_data = []
|
||
# scales: (indices and corresponding H,W from tensors)
|
||
for i in range(0, 9, 3):
|
||
reg = outputs[i][0] # (64, H, W)
|
||
cls = outputs[i+1][0] # (80, H, W)
|
||
obj = outputs[i+2][0] # (1, H, W)
|
||
|
||
# find H,W from reg
|
||
H = reg.shape[1]
|
||
W = reg.shape[2]
|
||
|
||
# xywh: assume first 4 channels of reg are x,y,w,h per cell
|
||
xywh = reg[0:4, :, :] # (4, H, W)
|
||
xywh = np.transpose(xywh, (1, 2, 0)) # (H, W, 4)
|
||
|
||
# obj and cls to H,W,?
|
||
obj_hw = np.transpose(obj[0, :, :], (0, 1)) # (H, W)
|
||
cls_hw = np.transpose(cls, (1, 2, 0)) # (H, W, 80)
|
||
|
||
# build one anchor slice: [x,y,w,h,obj, cls80] -> shape (H, W, 5+80)
|
||
slice_hw = np.concatenate([xywh, obj_hw[..., None], cls_hw], axis=-1) # (H, W, 85)
|
||
|
||
# repeat to make 3 anchors per cell (so shape becomes H,W,3,85)
|
||
slice_3 = np.repeat(slice_hw[:, :, None, :], 3, axis=2) # (H, W, 3, 85)
|
||
|
||
input_data.append(slice_3)
|
||
|
||
return input_data
|
||
|
||
|
||
def yolov5_post_process_adapted(input_data):
|
||
"""
|
||
复用你原来的 yolov5_post_process,但使用 unit anchors so that
|
||
box_wh 不会被不正确放缩。
|
||
"""
|
||
masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] # unused in anchors here, but kept for compatibility
|
||
# use neutral anchors (1,1) to avoid scaling change
|
||
anchors = [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1],
|
||
[1, 1], [1, 1], [1, 1], [1, 1]]
|
||
|
||
boxes, classes, scores = [], [], []
|
||
# input_data already is list of 3 arrays shaped (H,W,3,85)
|
||
for input in input_data:
|
||
# process() expects shape (grid_h, grid_w, 3, attrs)
|
||
b, c, s = process(input, [0,1,2], anchors) # mask and anchors values used inside process
|
||
b, c, s = filter_boxes(b, c, s)
|
||
boxes.append(b)
|
||
classes.append(c)
|
||
scores.append(s)
|
||
|
||
if len(boxes) == 0:
|
||
return None, None, None
|
||
|
||
boxes = np.concatenate(boxes)
|
||
boxes = xywh2xyxy(boxes)
|
||
classes = np.concatenate(classes)
|
||
scores = np.concatenate(scores)
|
||
|
||
# nms per class
|
||
nboxes, nclasses, nscores = [], [], []
|
||
for cls_id in set(classes):
|
||
inds = np.where(classes == cls_id)
|
||
b = boxes[inds]
|
||
c = classes[inds]
|
||
s = scores[inds]
|
||
|
||
keep = nms_boxes(b, s)
|
||
|
||
nboxes.append(b[keep])
|
||
nclasses.append(c[keep])
|
||
nscores.append(s[keep])
|
||
|
||
if not nclasses and not nscores:
|
||
return None, None, None
|
||
|
||
return np.concatenate(nboxes), np.concatenate(nclasses), np.concatenate(nscores)
|
||
|
||
|
||
# ---------- draw 保持原样 ----------
|
||
def draw(image, boxes, scores, classes):
|
||
for box, score, cl in zip(boxes, scores, classes):
|
||
top, left, right, bottom = box
|
||
top = int(top)
|
||
left = int(left)
|
||
cv2.rectangle(image, (top, left), (int(right), int(bottom)), (255, 0, 0), 2)
|
||
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
|
||
(top, left - 6),
|
||
cv2.FONT_HERSHEY_SIMPLEX,
|
||
0.6, (0, 0, 255), 2)
|
||
|
||
|
||
# ---------- 最终 myFunc(替换你原来的 myFunc) ----------
|
||
def myFunc(rknn_lite, IMG):
|
||
# 1. BGR -> RGB
|
||
IMG = cv2.cvtColor(IMG, cv2.COLOR_BGR2RGB)
|
||
|
||
# 2. Resize to model input size (640x640)
|
||
IMG = cv2.resize(IMG, (IMG_SIZE, IMG_SIZE)) # (640, 640, 3)
|
||
|
||
# 3. HWC -> CHW
|
||
IMG = np.transpose(IMG, (2, 0, 1)) # (3, 640, 640)
|
||
|
||
# 4. Add batch dimension: (1, 3, 640, 640)
|
||
IMG_in = np.expand_dims(IMG, axis=0).astype(np.uint8)
|
||
|
||
# 5. Inference
|
||
outputs = rknn_lite.inference(inputs=[IMG_in])
|
||
|
||
if outputs is None:
|
||
print("⚠️ Inference failed, skipping frame.")
|
||
return cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
|
||
|
||
# 6. Convert 9-output -> yolov5 style input_data
|
||
input_data = yolov11_to_yolov5_style_input(outputs)
|
||
|
||
# 7. Run adapted yolov5_post_process
|
||
boxes, classes, scores = yolov5_post_process_adapted(input_data)
|
||
|
||
# 8. 如果你只想保留 car(COCO id=2),在这里再过滤一次
|
||
if boxes is not None:
|
||
keep_car = np.where(classes == 2)[0]
|
||
if keep_car.size == 0:
|
||
boxes, classes, scores = None, None, None
|
||
else:
|
||
boxes = boxes[keep_car]
|
||
classes = classes[keep_car]
|
||
scores = scores[keep_car]
|
||
|
||
# Convert back to BGR for OpenCV display
|
||
IMG_vis = cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
|
||
if boxes is not None:
|
||
draw(IMG_vis, boxes, scores, classes)
|
||
return IMG_vis
|
||
|