Compare commits
2 Commits
a6505573b9
...
d3a5cbfad0
| Author | SHA1 | Date | |
|---|---|---|---|
| d3a5cbfad0 | |||
| 1ec9bbab60 |
5
.idea/.gitignore
generated
vendored
Normal file
5
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
12
.idea/ailai_image_obb.iml
generated
Normal file
12
.idea/ailai_image_obb.iml
generated
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="jdk" jdkName="yolov11" jdkType="Python SDK" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="PyDocumentationSettings">
|
||||||
|
<option name="format" value="PLAIN" />
|
||||||
|
<option name="myDocStringFormat" value="Plain" />
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
||||||
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="Python 3.10" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="yolov11" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
||||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/ailai_image_obb.iml" filepath="$PROJECT_DIR$/.idea/ailai_image_obb.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
7
.idea/vcs.xml
generated
Normal file
7
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
BIN
ailai_pc/1.jpg
Normal file
BIN
ailai_pc/1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 333 KiB |
BIN
ailai_pc/2.jpg
Normal file
BIN
ailai_pc/2.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 860 KiB |
BIN
ailai_pc/22222.jpg
Normal file
BIN
ailai_pc/22222.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 733 KiB |
BIN
ailai_pc/3.jpg
Normal file
BIN
ailai_pc/3.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 725 KiB |
BIN
ailai_pc/best.pt
BIN
ailai_pc/best.pt
Binary file not shown.
BIN
ailai_pc/best1.pt
Normal file
BIN
ailai_pc/best1.pt
Normal file
Binary file not shown.
134
ailai_pc/detect.py
Normal file
134
ailai_pc/detect.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from rknnlite.api import RKNNLite
|
||||||
|
|
||||||
|
MODEL_PATH = "detect.rknn"
|
||||||
|
CLASS_NAMES = ["bag"] # 单类
|
||||||
|
|
||||||
|
|
||||||
|
class Yolo11Detector:
|
||||||
|
def __init__(self, model_path):
|
||||||
|
self.rknn = RKNNLite(verbose=False)
|
||||||
|
|
||||||
|
# 加载 RKNN 模型
|
||||||
|
ret = self.rknn.load_rknn(model_path)
|
||||||
|
assert ret == 0, "❌ Load RKNN model failed"
|
||||||
|
|
||||||
|
# 初始化运行时(使用 NPU 核心0)
|
||||||
|
ret = self.rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
|
||||||
|
assert ret == 0, "❌ Init runtime failed"
|
||||||
|
|
||||||
|
# 模型输入大小
|
||||||
|
self.input_size = 640
|
||||||
|
# YOLO anchors(根据你训练的模型)
|
||||||
|
self.anchors = {
|
||||||
|
8: [[10, 13], [16, 30], [33, 23]],
|
||||||
|
16: [[30, 61], [62, 45], [59, 119]],
|
||||||
|
32: [[116, 90], [156, 198], [373, 326]]
|
||||||
|
}
|
||||||
|
|
||||||
|
def preprocess(self, img):
|
||||||
|
"""高性能预处理:缩放+RGB"""
|
||||||
|
h, w = img.shape[:2]
|
||||||
|
scale = min(self.input_size / w, self.input_size / h)
|
||||||
|
new_w, new_h = int(w * scale), int(h * scale)
|
||||||
|
img_resized = cv2.resize(img, (new_w, new_h))
|
||||||
|
canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8)
|
||||||
|
dw, dh = (self.input_size - new_w) // 2, (self.input_size - new_h) // 2
|
||||||
|
canvas[dh:dh + new_h, dw:dw + new_w, :] = img_resized
|
||||||
|
img_rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB)
|
||||||
|
return np.expand_dims(img_rgb, 0).astype(np.uint8), scale, dw, dh
|
||||||
|
|
||||||
|
def postprocess(self, outputs, scale, dw, dh, conf_thresh=0.25, iou_thresh=0.45):
|
||||||
|
"""解析 YOLO 输出"""
|
||||||
|
# 注意:根据 RKNN 输出节点选择
|
||||||
|
preds = outputs[0].reshape(-1, outputs[0].shape[1]) # 假设输出 [1, N, C]
|
||||||
|
boxes, scores, class_ids = [], [], []
|
||||||
|
|
||||||
|
for p in preds:
|
||||||
|
conf = p[4]
|
||||||
|
if conf < conf_thresh:
|
||||||
|
continue
|
||||||
|
cls_conf = p[5] # 单类模型
|
||||||
|
score = conf * cls_conf
|
||||||
|
if score < conf_thresh:
|
||||||
|
continue
|
||||||
|
cx, cy, w, h = p[:4]
|
||||||
|
x1 = (cx - w / 2 - dw) / scale
|
||||||
|
y1 = (cy - h / 2 - dh) / scale
|
||||||
|
x2 = (cx + w / 2 - dw) / scale
|
||||||
|
y2 = (cy + h / 2 - dh) / scale
|
||||||
|
boxes.append([x1, y1, x2, y2])
|
||||||
|
scores.append(score)
|
||||||
|
class_ids.append(0) # 单类
|
||||||
|
|
||||||
|
if len(boxes) == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
boxes = np.array(boxes)
|
||||||
|
scores = np.array(scores)
|
||||||
|
class_ids = np.array(class_ids)
|
||||||
|
|
||||||
|
# 简单 NMS
|
||||||
|
idxs = np.argsort(scores)[::-1]
|
||||||
|
keep = []
|
||||||
|
while len(idxs) > 0:
|
||||||
|
i = idxs[0]
|
||||||
|
keep.append(i)
|
||||||
|
if len(idxs) == 1:
|
||||||
|
break
|
||||||
|
x1, y1, x2, y2 = boxes[i]
|
||||||
|
xx1 = np.maximum(x1, boxes[idxs[1:], 0])
|
||||||
|
yy1 = np.maximum(y1, boxes[idxs[1:], 1])
|
||||||
|
xx2 = np.minimum(x2, boxes[idxs[1:], 2])
|
||||||
|
yy2 = np.minimum(y2, boxes[idxs[1:], 3])
|
||||||
|
inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
|
||||||
|
area_i = (x2 - x1) * (y2 - y1)
|
||||||
|
area_j = (boxes[idxs[1:], 2] - boxes[idxs[1:], 0]) * (boxes[idxs[1:], 3] - boxes[idxs[1:], 1])
|
||||||
|
iou = inter / (area_i + area_j - inter + 1e-6)
|
||||||
|
idxs = idxs[1:][iou < iou_thresh]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for i in keep:
|
||||||
|
results.append({
|
||||||
|
"box": boxes[i],
|
||||||
|
"score": scores[i],
|
||||||
|
"class_id": class_ids[i]
|
||||||
|
})
|
||||||
|
return results
|
||||||
|
|
||||||
|
def detect(self, img):
|
||||||
|
img_data, scale, dw, dh = self.preprocess(img)
|
||||||
|
outputs = self.rknn.inference([img_data])
|
||||||
|
results = self.postprocess(outputs, scale, dw, dh)
|
||||||
|
return results
|
||||||
|
|
||||||
|
def release(self):
|
||||||
|
self.rknn.release()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
detector = Yolo11Detector(MODEL_PATH)
|
||||||
|
cap = cv2.VideoCapture(0) # 可以换成图片路径
|
||||||
|
|
||||||
|
while True:
|
||||||
|
ret, frame = cap.read()
|
||||||
|
if not ret:
|
||||||
|
break
|
||||||
|
|
||||||
|
results = detector.detect(frame)
|
||||||
|
|
||||||
|
for r in results:
|
||||||
|
x1, y1, x2, y2 = map(int, r["box"])
|
||||||
|
cls_id = r["class_id"]
|
||||||
|
score = r["score"]
|
||||||
|
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||||
|
cv2.putText(frame, f"{CLASS_NAMES[cls_id]} {score:.2f}", (x1, y1 - 10),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
||||||
|
|
||||||
|
cv2.imshow("YOLOv11 Detection", frame)
|
||||||
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||||
|
break
|
||||||
|
|
||||||
|
detector.release()
|
||||||
|
cap.release()
|
||||||
72
ailai_pc/detet_pc.py
Normal file
72
ailai_pc/detet_pc.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# detect_pt.py
|
||||||
|
import cv2
|
||||||
|
import torch
|
||||||
|
from ultralytics import YOLO
|
||||||
|
|
||||||
|
# ======================
|
||||||
|
# 配置参数
|
||||||
|
# ======================
|
||||||
|
MODEL_PATH = 'best.pt' # 你的训练模型路径(yolov8n.pt 或你自己训练的)
|
||||||
|
#IMG_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/train/192.168.0.234_01_202510141514352.jpg' # 测试图像路径
|
||||||
|
IMG_PATH = '1.jpg'
|
||||||
|
OUTPUT_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/output_pt.jpg' # 可视化结果保存路径
|
||||||
|
CONF_THRESH = 0.5 # 置信度阈值
|
||||||
|
CLASS_NAMES = ['bag'] # 你的类别名列表(按训练时顺序)
|
||||||
|
|
||||||
|
# 是否显示窗口(适合有 GUI 的 PC)
|
||||||
|
SHOW_IMAGE = True
|
||||||
|
|
||||||
|
# ======================
|
||||||
|
# 主函数
|
||||||
|
# ======================
|
||||||
|
def main():
|
||||||
|
# 检查 CUDA
|
||||||
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
|
print(f"✅ 使用设备: {device}")
|
||||||
|
|
||||||
|
# 加载模型
|
||||||
|
print("➡️ 加载 YOLO 模型...")
|
||||||
|
model = YOLO(MODEL_PATH) # 自动加载架构和权重
|
||||||
|
model.to(device)
|
||||||
|
|
||||||
|
# 推理
|
||||||
|
print("➡️ 开始推理...")
|
||||||
|
results = model(IMG_PATH, imgsz=640, conf=CONF_THRESH, device=device)
|
||||||
|
|
||||||
|
# 获取第一张图的结果
|
||||||
|
r = results[0]
|
||||||
|
|
||||||
|
# 获取原始图像(BGR)
|
||||||
|
img = cv2.imread(IMG_PATH)
|
||||||
|
if img is None:
|
||||||
|
raise FileNotFoundError(f"无法读取图像: {IMG_PATH}")
|
||||||
|
|
||||||
|
print("\n📋 检测结果:")
|
||||||
|
for box in r.boxes:
|
||||||
|
# 获取数据
|
||||||
|
xyxy = box.xyxy[0].cpu().numpy() # [x1, y1, x2, y2]
|
||||||
|
conf = box.conf.cpu().numpy()[0] # 置信度
|
||||||
|
cls_id = int(box.cls.cpu().numpy()[0]) # 类别 ID
|
||||||
|
cls_name = CLASS_NAMES[cls_id] # 类别名
|
||||||
|
|
||||||
|
x1, y1, x2, y2 = map(int, xyxy)
|
||||||
|
print(f" 类别: {cls_name}, 置信度: {conf:.3f}, 框: [{x1}, {y1}, {x2}, {y2}]")
|
||||||
|
|
||||||
|
# 画框
|
||||||
|
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||||
|
# 画标签
|
||||||
|
label = f"{cls_name} {conf:.2f}"
|
||||||
|
cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
|
||||||
|
|
||||||
|
# 保存结果
|
||||||
|
cv2.imwrite(OUTPUT_PATH, img)
|
||||||
|
print(f"\n🖼️ 可视化结果已保存: {OUTPUT_PATH}")
|
||||||
|
|
||||||
|
# 显示(可选)
|
||||||
|
if SHOW_IMAGE:
|
||||||
|
cv2.imshow("YOLOv8 Detection", img)
|
||||||
|
cv2.waitKey(0)
|
||||||
|
cv2.destroyAllWindows()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
BIN
ailai_pc/output_pt.jpg
Normal file
BIN
ailai_pc/output_pt.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 586 KiB |
BIN
ailai_pc/yolo_obb_dataset/1.jpg
Normal file
BIN
ailai_pc/yolo_obb_dataset/1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 392 KiB |
@ -26,7 +26,7 @@
|
|||||||
pip install opencv-python numpy rknnlite
|
pip install opencv-python numpy rknnlite
|
||||||
```
|
```
|
||||||
|
|
||||||
## 函数调用
|
## 函数调用1.0
|
||||||
|
|
||||||
您也可以直接调用 calculate_offset_from_image 函数,以便集成到其他项目中:
|
您也可以直接调用 calculate_offset_from_image 函数,以便集成到其他项目中:
|
||||||
示例 1: 仅获取偏移量(不画图)
|
示例 1: 仅获取偏移量(不画图)
|
||||||
@ -46,11 +46,53 @@ from calculate_offset import calculate_offset_from_image
|
|||||||
result = calculate_offset_from_image("your_image_path.jpg", visualize=True)
|
result = calculate_offset_from_image("your_image_path.jpg", visualize=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
该函数返回一个包含下列字段的字典:
|
## 函数调用2.0
|
||||||
|
|
||||||
|
示例 1: 仅获取偏移量(不画图)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
from caculate_diff2.0 import calculate_offset_from_image
|
||||||
|
|
||||||
|
result = calculate_offset_from_image("11.jpg", visualize=False)
|
||||||
|
if result['success']:
|
||||||
|
print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
|
||||||
|
else:
|
||||||
|
print("Error:", result['message'])
|
||||||
|
|
||||||
|
```
|
||||||
|
示例 2: 获取偏移量并保存可视化图
|
||||||
|
|
||||||
|
```bash
|
||||||
|
from caculate_diff2.0 import calculate_offset_from_image
|
||||||
|
|
||||||
|
result = calculate_offset_from_image("11.jpg", visualize=True)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
##该函数返回一个包含下列字段的字典1.0:
|
||||||
|
|
||||||
success: 成功标志(True/False)
|
success: 成功标志(True/False)
|
||||||
dx_mm: 水平偏移(毫米)
|
dx_mm: 水平偏移(毫米)
|
||||||
dy_mm: 垂直偏移(毫米)
|
dy_mm: 垂直偏移(毫米)
|
||||||
cx: 中心点 x 坐标(像素)
|
cx: 中心点 x 坐标(像素)
|
||||||
cy: 中心点 y 坐标(像素)
|
cy: 中心点 y 坐标(像素)
|
||||||
message: 错误信息或成功提示
|
<<<<<<< HEAD
|
||||||
|
message: 错误信息或成功提示
|
||||||
|
|
||||||
|
##该函数返回一个包含下列字段的字典2.0:
|
||||||
|
|
||||||
|
success: 成功标志(True/False)
|
||||||
|
dx_mm: 水平偏移(毫米)
|
||||||
|
dy_mm: 垂直偏移(毫米)
|
||||||
|
cx: 中心点 x 坐标(像素)
|
||||||
|
cy: 中心点 y 坐标(像素)
|
||||||
|
message: 错误信息或成功提示
|
||||||
|
class_id: 检测类别 ID #这里是bag的id是0
|
||||||
|
obj_conf: 检测置信度 #这就是识别为料袋的置信度
|
||||||
|
bbox: 检测矩形框 [x_left, y_top, width, height]
|
||||||
|
message: 错误信息或成功提示
|
||||||
|
|
||||||
|
|
||||||
|
=======
|
||||||
|
message: 错误信息或成功提示
|
||||||
|
>>>>>>> a6505573b9361ce4ab920ddc55f4bc6d86d7dfb4
|
||||||
|
|||||||
235
main/caculate_diff2.0.py
Normal file
235
main/caculate_diff2.0.py
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
from rknnlite.api import RKNNLite
|
||||||
|
|
||||||
|
# ====================== 配置区 ======================
|
||||||
|
MODEL_PATH = "point.rknn"
|
||||||
|
OUTPUT_DIR = "./output_rknn"
|
||||||
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
# 固定参考点(像素坐标)
|
||||||
|
FIXED_REF_POINT = (535, 605)
|
||||||
|
|
||||||
|
# mm/px 缩放因子(根据标定数据填写)
|
||||||
|
width_mm = 70.0
|
||||||
|
width_px = 42
|
||||||
|
SCALE_X = width_mm / float(width_px)
|
||||||
|
height_mm = 890.0
|
||||||
|
height_px = 507
|
||||||
|
SCALE_Y = height_mm / float(height_px)
|
||||||
|
print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px")
|
||||||
|
|
||||||
|
# 输入尺寸
|
||||||
|
IMG_SIZE = (640, 640)
|
||||||
|
|
||||||
|
|
||||||
|
def letterbox_resize(image, size, bg_color=114):
|
||||||
|
target_w, target_h = size
|
||||||
|
h, w = image.shape[:2]
|
||||||
|
scale = min(target_w / w, target_h / h)
|
||||||
|
new_w, new_h = int(w * scale), int(h * scale)
|
||||||
|
resized = cv2.resize(image, (new_w, new_h))
|
||||||
|
canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8)
|
||||||
|
dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2
|
||||||
|
canvas[dy:dy + new_h, dx:dx + new_w] = resized
|
||||||
|
return canvas, scale, dx, dy
|
||||||
|
|
||||||
|
|
||||||
|
def safe_sigmoid(x):
|
||||||
|
x = np.clip(x, -50, 50)
|
||||||
|
return 1.0 / (1.0 + np.exp(-x))
|
||||||
|
|
||||||
|
|
||||||
|
def softmax(x):
|
||||||
|
x = x - np.max(x)
|
||||||
|
e = np.exp(x)
|
||||||
|
return e / e.sum()
|
||||||
|
|
||||||
|
|
||||||
|
def dfl_to_xywh(loc, grid_x, grid_y, stride):
|
||||||
|
"""将 DFL 输出解析为 xywh"""
|
||||||
|
xywh_ = np.zeros(4)
|
||||||
|
xywh = np.zeros(4)
|
||||||
|
|
||||||
|
# 每个维度 16 bins 做 softmax
|
||||||
|
for i in range(4):
|
||||||
|
l = loc[i * 16:(i + 1) * 16]
|
||||||
|
l = softmax(l)
|
||||||
|
xywh_[i] = sum([j * l[j] for j in range(16)])
|
||||||
|
|
||||||
|
# 对应公式
|
||||||
|
xywh_[0] = (grid_x + 0.5) - xywh_[0]
|
||||||
|
xywh_[1] = (grid_y + 0.5) - xywh_[1]
|
||||||
|
xywh_[2] = (grid_x + 0.5) + xywh_[2]
|
||||||
|
xywh_[3] = (grid_y + 0.5) + xywh_[3]
|
||||||
|
|
||||||
|
# 转成中心点 + 宽高
|
||||||
|
xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride
|
||||||
|
xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride
|
||||||
|
xywh[2] = (xywh_[2] - xywh_[0]) * stride
|
||||||
|
xywh[3] = (xywh_[3] - xywh_[1]) * stride
|
||||||
|
|
||||||
|
# 转为左上角坐标
|
||||||
|
xywh[0] = xywh[0] - xywh[2] / 2
|
||||||
|
xywh[1] = xywh[1] - xywh[3] / 2
|
||||||
|
return xywh
|
||||||
|
|
||||||
|
|
||||||
|
def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0):
|
||||||
|
"""
|
||||||
|
完整解析 RKNN YOLO-Pose 输出
|
||||||
|
返回 keypoints, class_id, obj_conf, bbox(已映射回原图)
|
||||||
|
"""
|
||||||
|
boxes = []
|
||||||
|
obj_confs = []
|
||||||
|
class_ids = []
|
||||||
|
|
||||||
|
# 遍历前三个输出 tensor (det 输出)
|
||||||
|
for idx in range(3):
|
||||||
|
det = np.array(outputs[idx])[0] # (C,H,W)
|
||||||
|
C, H, W = det.shape
|
||||||
|
num_classes = C - 64 # 前64通道为 DFL bbox
|
||||||
|
stride = 640 // H
|
||||||
|
|
||||||
|
for h in range(H):
|
||||||
|
for w in range(W):
|
||||||
|
for c in range(num_classes):
|
||||||
|
conf = safe_sigmoid(det[64 + c, h, w])
|
||||||
|
if conf >= conf_threshold:
|
||||||
|
loc = det[:64, h, w].astype(np.float32)
|
||||||
|
xywh = dfl_to_xywh(loc, w, h, stride)
|
||||||
|
boxes.append(xywh)
|
||||||
|
obj_confs.append(conf)
|
||||||
|
class_ids.append(c)
|
||||||
|
|
||||||
|
if not obj_confs:
|
||||||
|
best_box = np.array([0, 0, 0, 0])
|
||||||
|
class_id = -1
|
||||||
|
obj_conf = 0.0
|
||||||
|
else:
|
||||||
|
max_idx = np.argmax(obj_confs)
|
||||||
|
best_box = boxes[max_idx]
|
||||||
|
class_id = class_ids[max_idx]
|
||||||
|
obj_conf = obj_confs[max_idx]
|
||||||
|
|
||||||
|
# 🔹 bbox 坐标映射回原图
|
||||||
|
x, y, w, h = best_box
|
||||||
|
x = (x - dx) / scale
|
||||||
|
y = (y - dy) / scale
|
||||||
|
w = w / scale
|
||||||
|
h = h / scale
|
||||||
|
best_box = np.array([x, y, w, h])
|
||||||
|
|
||||||
|
# 🔹 关键点解析
|
||||||
|
kpt_output = np.array(outputs[3])[0] # (num_kpts, 3, num_anchor)
|
||||||
|
confs = kpt_output[:, 2, :]
|
||||||
|
best_anchor_idx = np.argmax(np.mean(confs, axis=0))
|
||||||
|
kpt_data = kpt_output[:, :, best_anchor_idx]
|
||||||
|
|
||||||
|
keypoints = []
|
||||||
|
for i in range(kpt_data.shape[0]):
|
||||||
|
x_img, y_img, vis_conf_raw = kpt_data[i]
|
||||||
|
vis_prob = safe_sigmoid(vis_conf_raw)
|
||||||
|
x_orig = (x_img - dx) / scale
|
||||||
|
y_orig = (y_img - dy) / scale
|
||||||
|
keypoints.append([x_orig, y_orig, vis_prob])
|
||||||
|
|
||||||
|
return np.array(keypoints), class_id, obj_conf, best_box
|
||||||
|
|
||||||
|
|
||||||
|
def compute_offset(keypoints, fixed_point, scale_x, scale_y):
|
||||||
|
if len(keypoints) < 2:
|
||||||
|
return None
|
||||||
|
p1, p2 = keypoints[0], keypoints[1]
|
||||||
|
cx = (p1[0] + p2[0]) / 2.0
|
||||||
|
cy = (p1[1] + p2[1]) / 2.0
|
||||||
|
dx_mm = (cx - fixed_point[0]) * scale_x
|
||||||
|
dy_mm = (cy - fixed_point[1]) * scale_y
|
||||||
|
return cx, cy, dx_mm, dy_mm
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path):
|
||||||
|
vis = image.copy()
|
||||||
|
colors = [(0, 0, 255), (0, 255, 255)]
|
||||||
|
cx, cy, dx_mm, dy_mm = offset_info
|
||||||
|
fx, fy = map(int, fixed_point)
|
||||||
|
|
||||||
|
# 绘制关键点
|
||||||
|
for i, (x, y, conf) in enumerate(keypoints[:2]):
|
||||||
|
if conf > 0.5:
|
||||||
|
cv2.circle(vis, (int(x), int(y)), 6, colors[i], -1)
|
||||||
|
if len(keypoints) >= 2:
|
||||||
|
cv2.line(vis,
|
||||||
|
(int(keypoints[0][0]), int(keypoints[0][1])),
|
||||||
|
(int(keypoints[1][0]), int(keypoints[1][1])),
|
||||||
|
(0, 255, 0), 2)
|
||||||
|
|
||||||
|
# 绘制 bbox
|
||||||
|
x, y, w, h = bbox
|
||||||
|
cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2)
|
||||||
|
|
||||||
|
# 绘制中心点
|
||||||
|
cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3)
|
||||||
|
cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3)
|
||||||
|
cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05)
|
||||||
|
cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
|
||||||
|
cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30),
|
||||||
|
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
|
||||||
|
|
||||||
|
cv2.imwrite(save_path, vis)
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_offset_from_image(image_path, visualize=False):
|
||||||
|
orig = cv2.imread(image_path)
|
||||||
|
if orig is None:
|
||||||
|
return {'success': False, 'message': f'Failed to load image: {image_path}'}
|
||||||
|
|
||||||
|
img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE)
|
||||||
|
infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8)
|
||||||
|
|
||||||
|
rknn = RKNNLite(verbose=False)
|
||||||
|
ret = rknn.load_rknn(MODEL_PATH)
|
||||||
|
if ret != 0:
|
||||||
|
return {'success': False, 'message': 'Failed to load RKNN model'}
|
||||||
|
|
||||||
|
try:
|
||||||
|
rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
|
||||||
|
outputs = rknn.inference([infer_img])
|
||||||
|
finally:
|
||||||
|
rknn.release()
|
||||||
|
|
||||||
|
try:
|
||||||
|
keypoints, class_id, obj_conf, bbox = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale)
|
||||||
|
except Exception as e:
|
||||||
|
return {'success': False, 'message': f'Parse error: {str(e)}'}
|
||||||
|
|
||||||
|
offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y)
|
||||||
|
if offset_info is None:
|
||||||
|
return {'success': False, 'message': 'Not enough keypoints'}
|
||||||
|
|
||||||
|
cx, cy, dx_mm, dy_mm = offset_info
|
||||||
|
|
||||||
|
if visualize:
|
||||||
|
vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}")
|
||||||
|
visualize_result(orig, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path)
|
||||||
|
|
||||||
|
return {'success': True, 'dx_mm': dx_mm, 'dy_mm': dy_mm,
|
||||||
|
'cx': cx, 'cy': cy, 'class_id': class_id,
|
||||||
|
'obj_conf': obj_conf, 'bbox': bbox,
|
||||||
|
'message': 'Success'}
|
||||||
|
|
||||||
|
|
||||||
|
# ====================== 使用示例 ======================
|
||||||
|
if __name__ == "__main__":
|
||||||
|
image_path = "11.jpg"
|
||||||
|
result = calculate_offset_from_image(image_path, visualize=True)
|
||||||
|
|
||||||
|
if result['success']:
|
||||||
|
print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})")
|
||||||
|
print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm")
|
||||||
|
print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}")
|
||||||
|
print(f"BBox: {result['bbox']}")
|
||||||
|
else:
|
||||||
|
print("Error:", result['message'])
|
||||||
Reference in New Issue
Block a user