更新了calculate_diff3.0.py,一次加载rknn,加速推理

This commit is contained in:
琉璃月光
2025-12-12 16:48:37 +08:00
parent 6a6f639a9a
commit 72b5052d2e
4 changed files with 227 additions and 3 deletions

View File

@ -0,0 +1,224 @@
import cv2
import numpy as np
import os
from rknnlite.api import RKNNLite
# ====================== 配置区 ======================
MODEL_PATH = "point.rknn"
OUTPUT_DIR = "./output_rknn"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# 固定参考点(像素坐标)
FIXED_REF_POINT = (535, 605)
# mm/px 缩放因子(根据标定数据填写)
width_mm = 70.0
width_px = 42
SCALE_X = width_mm / float(width_px)
height_mm = 890.0
height_px = 507
SCALE_Y = height_mm / float(height_px)
print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px")
# 输入尺寸
IMG_SIZE = (640, 640)
# ====================== RKNN 单例管理 ======================
_rknn_instance = None
def init_rknn_once(model_path):
"""只加载一次 RKNN 模型"""
global _rknn_instance
if _rknn_instance is None:
_rknn_instance = RKNNLite(verbose=False)
ret = _rknn_instance.load_rknn(model_path)
if ret != 0:
print(f"[ERROR] Failed to load RKNN: {ret}")
_rknn_instance = None
return None
ret = _rknn_instance.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
if ret != 0:
print(f"[ERROR] Failed to init RKNN runtime: {ret}")
_rknn_instance = None
return None
return _rknn_instance
def release_rknn():
"""释放 RKNN 单例"""
global _rknn_instance
if _rknn_instance:
_rknn_instance.release()
_rknn_instance = None
# ====================== 工具函数 ======================
def letterbox_resize(image, size, bg_color=114):
target_w, target_h = size
h, w = image.shape[:2]
scale = min(target_w / w, target_h / h)
new_w, new_h = int(w * scale), int(h * scale)
resized = cv2.resize(image, (new_w, new_h))
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
canvas[dy:dy + new_h, dx:dx + new_w] = resized
return canvas, scale, dx, dy
def safe_sigmoid(x):
x = np.clip(x, -50, 50)
return 1.0 / (1.0 + np.exp(-x))
def softmax(x):
x = x - np.max(x)
e = np.exp(x)
return e / e.sum()
def dfl_to_xywh(loc, grid_x, grid_y, stride):
xywh_ = np.zeros(4)
xywh = np.zeros(4)
for i in range(4):
l = loc[i * 16:(i + 1) * 16]
l = softmax(l)
xywh_[i] = sum([j * l[j] for j in range(16)])
xywh_[0] = (grid_x + 0.5) - xywh_[0]
xywh_[1] = (grid_y + 0.5) - xywh_[1]
xywh_[2] = (grid_x + 0.5) + xywh_[2]
xywh_[3] = (grid_y + 0.5) + xywh_[3]
xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride
xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride
xywh[2] = (xywh_[2] - xywh_[0]) * stride
xywh[3] = (xywh_[3] - xywh_[1]) * stride
xywh[0] = xywh[0] - xywh[2] / 2
xywh[1] = xywh[1] - xywh[3] / 2
return xywh
def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
boxes = []
obj_confs = []
class_ids = []
for idx in range(3):
det = np.array(outputs[idx])[0]
C, H, W = det.shape
num_classes = C - 64
stride = 640 // H
for h in range(H):
for w in range(W):
for c in range(num_classes):
conf = safe_sigmoid(det[64 + c, h, w])
if conf >= conf_threshold:
loc = det[:64, h, w].astype(np.float32)
xywh = dfl_to_xywh(loc, w, h, stride)
boxes.append(xywh)
obj_confs.append(conf)
class_ids.append(c)
if not obj_confs:
best_box = np.array([0, 0, 0, 0])
class_id = -1
obj_conf = 0.0
else:
max_idx = np.argmax(obj_confs)
best_box = boxes[max_idx]
class_id = class_ids[max_idx]
obj_conf = obj_confs[max_idx]
x, y, w, h = best_box
x = (x - dx) / scale
y = (y - dy) / scale
w = w / scale
h = h / scale
best_box = np.array([x, y, w, h])
kpt_output = np.array(outputs[3])[0]
confs = kpt_output[:, 2, :]
best_anchor_idx = np.argmax(np.mean(confs, axis=0))
kpt_data = kpt_output[:, :, best_anchor_idx]
keypoints = []
for i in range(kpt_data.shape[0]):
x_img, y_img, vis_conf_raw = kpt_data[i]
vis_prob = safe_sigmoid(vis_conf_raw)
x_orig = (x_img - dx) / scale
y_orig = (y_img - dy) / scale
keypoints.append([x_orig, y_orig, vis_prob])
return np.array(keypoints), class_id, obj_conf, best_box
def compute_offset(keypoints, fixed_point, scale_x, scale_y):
if len(keypoints) < 2:
return None
p1, p2 = keypoints[0], keypoints[1]
cx = (p1[0] + p2[0]) / 2.0
cy = (p1[1] + p2[1]) / 2.0
dx_mm = (cx - fixed_point[0]) * scale_x
dy_mm = (cy - fixed_point[1]) * scale_y
return cx, cy, dx_mm, dy_mm
def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path):
vis = image.copy()
colors = [(0, 0, 255), (0, 255, 255)]
cx, cy, dx_mm, dy_mm = offset_info
fx, fy = map(int, fixed_point)
for i, (x, y, conf) in enumerate(keypoints[:2]):
if conf > 0.5:
cv2.circle(vis, (int(x), int(y)), 6, colors[i], -1)
if len(keypoints) >= 2:
cv2.line(vis, (int(keypoints[0][0]), int(keypoints[0][1])),
(int(keypoints[1][0]), int(keypoints[1][1])), (0, 255, 0), 2)
x, y, w, h = bbox
cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2)
cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3)
cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3)
cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05)
cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
cv2.imwrite(save_path, vis)
# ====================== 主函数 ======================
def calculate_offset_from_image(image_path, visualize=False):
orig = cv2.imread(image_path)
if orig is None:
return {'success': False, 'message': f'Failed to load image: {image_path}'}
img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE)
infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)
rknn = init_rknn_once(MODEL_PATH)
if rknn is None:
return {'success': False, 'message': 'Failed to init RKNN'}
try:
outputs = rknn.inference([infer_img])
except Exception as e:
return {'success': False, 'message': f'RKNN inference error: {str(e)}'}
try:
keypoints, class_id, obj_conf, bbox = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale)
except Exception as e:
return {'success': False, 'message': f'Parse error: {str(e)}'}
offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
if offset_info is None:
return {'success': False, 'message': 'Not enough keypoints'}
cx, cy, dx_mm, dy_mm = offset_info
if visualize:
vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}")
visualize_result(orig, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path)
return {'success': True, 'dx_mm': dx_mm, 'dy_mm': dy_mm,
'cx': cx, 'cy': cy, 'class_id': class_id,
'obj_conf': obj_conf, 'bbox': bbox,
'message': 'Success'}
# ====================== 示例调用 ======================
if __name__ == "__main__":
image_path = "11.jpg"
result = calculate_offset_from_image(image_path, visualize=False)
if result['success']:
print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}")
print(f"BBox: {result['bbox']}")
else:
print("Error:", result['message'])