ailai_image_point_diff/ailai_pc/yolo_obb_dataset/11111.py

import os
import cv2
import numpy as np
from rknnlite.api import RKNNLite

# ====================== 配置 ======================
MODEL_PATH = "yolo11.rknn"      # RKNN 模型路径
IMG_PATH = "11.jpg"             # 待检测图片
IMG_SIZE = (640, 640)           # 模型输入尺寸 (w,h)
OBJ_THRESH = 0.001              # 目标置信度阈值
NMS_THRESH = 0.45               # NMS 阈值
CLASS_NAME = ["bag"]            # 单类别
OUTPUT_DIR = "./result"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ====================== 工具函数 ======================
def letterbox_resize(image, size, bg_color=114):
    target_w, target_h = size
    h, w = image.shape[:2]
    scale = min(target_w / w, target_h / h)
    new_w, new_h = int(w * scale), int(h * scale)
    resized = cv2.resize(image, (new_w, new_h))
    canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
    dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
    canvas[dy:dy + new_h, dx:dx + new_w] = resized
    return canvas, scale, dx, dy

def dfl_numpy(position):
    """Distribution Focal Loss 解析，纯 NumPy 版本"""
    n, c, h, w = position.shape
    p_num = 4
    mc = c // p_num
    y = position.reshape(n, p_num, mc, h, w)
    y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)
    acc = np.arange(mc).reshape(1,1,mc,1,1)
    y = np.sum(y * acc, axis=2)
    return y

def box_process(position):
    """解析网络输出的框坐标"""
    grid_h, grid_w = position.shape[2:4]
    col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))
    col = col.reshape(1,1,grid_h,grid_w)
    row = row.reshape(1,1,grid_h,grid_w)
    grid = np.concatenate((col,row), axis=1)
    stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1)

    position = dfl_numpy(position)
    box_xy = grid + 0.5 - position[:,0:2,:,:]
    box_xy2 = grid + 0.5 + position[:,2:4,:,:]
    xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)
    return xyxy

def filter_boxes(boxes, box_confidences, box_class_probs):
    # sigmoid objectness
    box_confidences = 1 / (1 + np.exp(-box_confidences))
    # softmax class probs
    box_class_probs = np.exp(box_class_probs)
    box_class_probs /= np.sum(box_class_probs, axis=-1, keepdims=True)

    box_confidences = box_confidences.reshape(-1)
    class_max_score = np.max(box_class_probs, axis=-1)
    classes = np.argmax(box_class_probs, axis=-1)
    _pos = np.where(class_max_score * box_confidences >= OBJ_THRESH)
    boxes = boxes[_pos]
    classes = classes[_pos]
    scores = (class_max_score * box_confidences)[_pos]
    return boxes, classes, scores

def post_process(outputs, scale, dx, dy):
    boxes, classes_conf, scores = [], [], []
    branch_num = 3
    for i in range(branch_num):
        boxes.append(box_process(outputs[i*3]))
        classes_conf.append(outputs[i*3+1])
        scores.append(outputs[i*3+2])  # 使用真实 class 输出

    def sp_flatten(x):
        ch = x.shape[1]
        x = x.transpose(0,2,3,1)
        return x.reshape(-1,ch)

    boxes = np.concatenate([sp_flatten(b) for b in boxes])
    classes_conf = np.concatenate([sp_flatten(c) for c in classes_conf])
    scores = np.concatenate([sp_flatten(s) for s in scores])

    boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)

    if boxes.shape[0] == 0:
        return None, None, None

    # 只保留置信度最高的框
    max_idx = np.argmax(scores)
    boxes = boxes[max_idx:max_idx+1]
    classes = classes[max_idx:max_idx+1]
    scores = scores[max_idx:max_idx+1]

    # 映射回原图
    boxes[:, [0,2]] -= dx
    boxes[:, [1,3]] -= dy
    boxes /= scale
    boxes = boxes.clip(min=0)

    return boxes, classes, scores

def draw(image, boxes, scores, classes):
    for box, score, cl in zip(boxes, scores, classes):
        x1, y1, x2, y2 = [int(b) for b in box]
        cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 2)
        cv2.putText(image, f"{CLASS_NAME[cl]} {score:.3f}", (x1, y1-5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)

# ====================== 主流程 ======================
img = cv2.imread(IMG_PATH)
if img is None:
    raise ValueError(f"Image {IMG_PATH} not found!")

img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE)
input_data = np.expand_dims(img_resized, 0)  # 4 维输入

rknn = RKNNLite(verbose=False)
rknn.load_rknn(MODEL_PATH)
rknn.init_runtime()
outputs = rknn.inference([input_data])
rknn.release()

print("Outputs len:", len(outputs))
for i, out in enumerate(outputs):
    print(f"outputs[{i}].shape = {out.shape}, min={out.min()}, max={out.max()}, mean={out.mean():.4f}")

boxes, classes, scores = post_process(outputs, scale, dx, dy)
if boxes is None:
    print("Detected 0 boxes")
else:
    draw(img, boxes, scores, classes)
    result_path = os.path.join(OUTPUT_DIR, os.path.basename(IMG_PATH))
    cv2.imwrite(result_path, img)
    print(f"Detection result saved to {result_path}")
rknn替换，板子是3568的 2025-11-03 16:10:50 +08:00			`import os`
			`import cv2`
			`import numpy as np`
			`from rknnlite.api import RKNNLite`

			`# ====================== 配置 ======================`
			`MODEL_PATH = "yolo11.rknn" # RKNN 模型路径`
			`IMG_PATH = "11.jpg" # 待检测图片`
			`IMG_SIZE = (640, 640) # 模型输入尺寸 (w,h)`
			`OBJ_THRESH = 0.001 # 目标置信度阈值`
			`NMS_THRESH = 0.45 # NMS 阈值`
			`CLASS_NAME = ["bag"] # 单类别`
			`OUTPUT_DIR = "./result"`
			`os.makedirs(OUTPUT_DIR, exist_ok=True)`

			`# ====================== 工具函数 ======================`
			`def letterbox_resize(image, size, bg_color=114):`
			`target_w, target_h = size`
			`h, w = image.shape[:2]`
			`scale = min(target_w / w, target_h / h)`
			`new_w, new_h = int(w * scale), int(h * scale)`
			`resized = cv2.resize(image, (new_w, new_h))`
			`canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)`
			`dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2`
			`canvas[dy:dy + new_h, dx:dx + new_w] = resized`
			`return canvas, scale, dx, dy`

			`def dfl_numpy(position):`
			`"""Distribution Focal Loss 解析，纯 NumPy 版本"""`
			`n, c, h, w = position.shape`
			`p_num = 4`
			`mc = c // p_num`
			`y = position.reshape(n, p_num, mc, h, w)`
			`y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True)`
			`acc = np.arange(mc).reshape(1,1,mc,1,1)`
			`y = np.sum(y * acc, axis=2)`
			`return y`

			`def box_process(position):`
			`"""解析网络输出的框坐标"""`
			`grid_h, grid_w = position.shape[2:4]`
			`col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h))`
			`col = col.reshape(1,1,grid_h,grid_w)`
			`row = row.reshape(1,1,grid_h,grid_w)`
			`grid = np.concatenate((col,row), axis=1)`
			`stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1)`

			`position = dfl_numpy(position)`
			`box_xy = grid + 0.5 - position[:,0:2,:,:]`
			`box_xy2 = grid + 0.5 + position[:,2:4,:,:]`
			`xyxy = np.concatenate((box_xystride, box_xy2stride), axis=1)`
			`return xyxy`

			`def filter_boxes(boxes, box_confidences, box_class_probs):`
			`# sigmoid objectness`
			`box_confidences = 1 / (1 + np.exp(-box_confidences))`
			`# softmax class probs`
			`box_class_probs = np.exp(box_class_probs)`
			`box_class_probs /= np.sum(box_class_probs, axis=-1, keepdims=True)`

			`box_confidences = box_confidences.reshape(-1)`
			`class_max_score = np.max(box_class_probs, axis=-1)`
			`classes = np.argmax(box_class_probs, axis=-1)`
			`_pos = np.where(class_max_score * box_confidences >= OBJ_THRESH)`
			`boxes = boxes[_pos]`
			`classes = classes[_pos]`
			`scores = (class_max_score * box_confidences)[_pos]`
			`return boxes, classes, scores`

			`def post_process(outputs, scale, dx, dy):`
			`boxes, classes_conf, scores = [], [], []`
			`branch_num = 3`
			`for i in range(branch_num):`
			`boxes.append(box_process(outputs[i*3]))`
			`classes_conf.append(outputs[i*3+1])`
			`scores.append(outputs[i*3+2]) # 使用真实 class 输出`

			`def sp_flatten(x):`
			`ch = x.shape[1]`
			`x = x.transpose(0,2,3,1)`
			`return x.reshape(-1,ch)`

			`boxes = np.concatenate([sp_flatten(b) for b in boxes])`
			`classes_conf = np.concatenate([sp_flatten(c) for c in classes_conf])`
			`scores = np.concatenate([sp_flatten(s) for s in scores])`

			`boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)`

			`if boxes.shape[0] == 0:`
			`return None, None, None`

			`# 只保留置信度最高的框`
			`max_idx = np.argmax(scores)`
			`boxes = boxes[max_idx:max_idx+1]`
			`classes = classes[max_idx:max_idx+1]`
			`scores = scores[max_idx:max_idx+1]`

			`# 映射回原图`
			`boxes[:, [0,2]] -= dx`
			`boxes[:, [1,3]] -= dy`
			`boxes /= scale`
			`boxes = boxes.clip(min=0)`

			`return boxes, classes, scores`

			`def draw(image, boxes, scores, classes):`
			`for box, score, cl in zip(boxes, scores, classes):`
			`x1, y1, x2, y2 = [int(b) for b in box]`
			`cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 2)`
			`cv2.putText(image, f"{CLASS_NAME[cl]} {score:.3f}", (x1, y1-5),`
			`cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)`

			`# ====================== 主流程 ======================`
			`img = cv2.imread(IMG_PATH)`
			`if img is None:`
			`raise ValueError(f"Image {IMG_PATH} not found!")`

			`img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE)`
			`input_data = np.expand_dims(img_resized, 0) # 4 维输入`

			`rknn = RKNNLite(verbose=False)`
			`rknn.load_rknn(MODEL_PATH)`
			`rknn.init_runtime()`
			`outputs = rknn.inference([input_data])`
			`rknn.release()`

			`print("Outputs len:", len(outputs))`
			`for i, out in enumerate(outputs):`
			`print(f"outputs[{i}].shape = {out.shape}, min={out.min()}, max={out.max()}, mean={out.mean():.4f}")`

			`boxes, classes, scores = post_process(outputs, scale, dx, dy)`
			`if boxes is None:`
			`print("Detected 0 boxes")`
			`else:`
			`draw(img, boxes, scores, classes)`
			`result_path = os.path.join(OUTPUT_DIR, os.path.basename(IMG_PATH))`
			`cv2.imwrite(result_path, img)`
			`print(f"Detection result saved to {result_path}")`