diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..10b731c --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ diff --git a/.idea/ailai_image_obb.iml b/.idea/ailai_image_obb.iml new file mode 100644 index 0000000..8770519 --- /dev/null +++ b/.idea/ailai_image_obb.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..adde6f5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..9c3175b --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..8306744 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/ailai_pc/1.jpg b/ailai_pc/1.jpg new file mode 100644 index 0000000..e69faee Binary files /dev/null and b/ailai_pc/1.jpg differ diff --git a/ailai_pc/2.jpg b/ailai_pc/2.jpg new file mode 100644 index 0000000..e6af4b6 Binary files /dev/null and b/ailai_pc/2.jpg differ diff --git a/ailai_pc/22222.jpg b/ailai_pc/22222.jpg new file mode 100644 index 0000000..b76910c Binary files /dev/null and b/ailai_pc/22222.jpg differ diff --git a/ailai_pc/3.jpg b/ailai_pc/3.jpg new file mode 100644 index 0000000..59b3873 Binary files /dev/null and b/ailai_pc/3.jpg differ diff --git a/ailai_pc/best.pt b/ailai_pc/best.pt index a9e6a58..70b8aa6 100644 Binary files a/ailai_pc/best.pt and b/ailai_pc/best.pt differ diff --git a/ailai_pc/best1.pt b/ailai_pc/best1.pt new file mode 100644 index 0000000..a9e6a58 Binary files /dev/null and b/ailai_pc/best1.pt differ diff --git a/ailai_pc/detect.py b/ailai_pc/detect.py new file mode 100644 index 0000000..a01c9da --- /dev/null +++ b/ailai_pc/detect.py @@ -0,0 +1,134 @@ +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +MODEL_PATH = "detect.rknn" +CLASS_NAMES = ["bag"] # 单类 + + +class Yolo11Detector: + def __init__(self, model_path): + self.rknn = RKNNLite(verbose=False) + + # 加载 RKNN 模型 + ret = self.rknn.load_rknn(model_path) + assert ret == 0, "❌ Load RKNN model failed" + + # 初始化运行时(使用 NPU 核心0) + ret = self.rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + assert ret == 0, "❌ Init runtime failed" + + # 模型输入大小 + self.input_size = 640 + # YOLO anchors(根据你训练的模型) + self.anchors = { + 8: [[10, 13], [16, 30], [33, 23]], + 16: [[30, 61], [62, 45], [59, 119]], + 32: [[116, 90], [156, 198], [373, 326]] + } + + def preprocess(self, img): + """高性能预处理:缩放+RGB""" + h, w = img.shape[:2] + scale = min(self.input_size / w, self.input_size / h) + new_w, new_h = int(w * scale), int(h * scale) + img_resized = cv2.resize(img, (new_w, new_h)) + canvas = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8) + dw, dh = (self.input_size - new_w) // 2, (self.input_size - new_h) // 2 + canvas[dh:dh + new_h, dw:dw + new_w, :] = img_resized + img_rgb = cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB) + return np.expand_dims(img_rgb, 0).astype(np.uint8), scale, dw, dh + + def postprocess(self, outputs, scale, dw, dh, conf_thresh=0.25, iou_thresh=0.45): + """解析 YOLO 输出""" + # 注意:根据 RKNN 输出节点选择 + preds = outputs[0].reshape(-1, outputs[0].shape[1]) # 假设输出 [1, N, C] + boxes, scores, class_ids = [], [], [] + + for p in preds: + conf = p[4] + if conf < conf_thresh: + continue + cls_conf = p[5] # 单类模型 + score = conf * cls_conf + if score < conf_thresh: + continue + cx, cy, w, h = p[:4] + x1 = (cx - w / 2 - dw) / scale + y1 = (cy - h / 2 - dh) / scale + x2 = (cx + w / 2 - dw) / scale + y2 = (cy + h / 2 - dh) / scale + boxes.append([x1, y1, x2, y2]) + scores.append(score) + class_ids.append(0) # 单类 + + if len(boxes) == 0: + return [] + + boxes = np.array(boxes) + scores = np.array(scores) + class_ids = np.array(class_ids) + + # 简单 NMS + idxs = np.argsort(scores)[::-1] + keep = [] + while len(idxs) > 0: + i = idxs[0] + keep.append(i) + if len(idxs) == 1: + break + x1, y1, x2, y2 = boxes[i] + xx1 = np.maximum(x1, boxes[idxs[1:], 0]) + yy1 = np.maximum(y1, boxes[idxs[1:], 1]) + xx2 = np.minimum(x2, boxes[idxs[1:], 2]) + yy2 = np.minimum(y2, boxes[idxs[1:], 3]) + inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1) + area_i = (x2 - x1) * (y2 - y1) + area_j = (boxes[idxs[1:], 2] - boxes[idxs[1:], 0]) * (boxes[idxs[1:], 3] - boxes[idxs[1:], 1]) + iou = inter / (area_i + area_j - inter + 1e-6) + idxs = idxs[1:][iou < iou_thresh] + + results = [] + for i in keep: + results.append({ + "box": boxes[i], + "score": scores[i], + "class_id": class_ids[i] + }) + return results + + def detect(self, img): + img_data, scale, dw, dh = self.preprocess(img) + outputs = self.rknn.inference([img_data]) + results = self.postprocess(outputs, scale, dw, dh) + return results + + def release(self): + self.rknn.release() + + +if __name__ == "__main__": + detector = Yolo11Detector(MODEL_PATH) + cap = cv2.VideoCapture(0) # 可以换成图片路径 + + while True: + ret, frame = cap.read() + if not ret: + break + + results = detector.detect(frame) + + for r in results: + x1, y1, x2, y2 = map(int, r["box"]) + cls_id = r["class_id"] + score = r["score"] + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(frame, f"{CLASS_NAMES[cls_id]} {score:.2f}", (x1, y1 - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) + + cv2.imshow("YOLOv11 Detection", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + detector.release() + cap.release() diff --git a/ailai_pc/detet_pc.py b/ailai_pc/detet_pc.py new file mode 100644 index 0000000..b9abdef --- /dev/null +++ b/ailai_pc/detet_pc.py @@ -0,0 +1,72 @@ +# detect_pt.py +import cv2 +import torch +from ultralytics import YOLO + +# ====================== +# 配置参数 +# ====================== +MODEL_PATH = 'best.pt' # 你的训练模型路径(yolov8n.pt 或你自己训练的) +#IMG_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/train/192.168.0.234_01_202510141514352.jpg' # 测试图像路径 +IMG_PATH = '1.jpg' +OUTPUT_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/output_pt.jpg' # 可视化结果保存路径 +CONF_THRESH = 0.5 # 置信度阈值 +CLASS_NAMES = ['bag'] # 你的类别名列表(按训练时顺序) + +# 是否显示窗口(适合有 GUI 的 PC) +SHOW_IMAGE = True + +# ====================== +# 主函数 +# ====================== +def main(): + # 检查 CUDA + device = 'cuda' if torch.cuda.is_available() else 'cpu' + print(f"✅ 使用设备: {device}") + + # 加载模型 + print("➡️ 加载 YOLO 模型...") + model = YOLO(MODEL_PATH) # 自动加载架构和权重 + model.to(device) + + # 推理 + print("➡️ 开始推理...") + results = model(IMG_PATH, imgsz=640, conf=CONF_THRESH, device=device) + + # 获取第一张图的结果 + r = results[0] + + # 获取原始图像(BGR) + img = cv2.imread(IMG_PATH) + if img is None: + raise FileNotFoundError(f"无法读取图像: {IMG_PATH}") + + print("\n📋 检测结果:") + for box in r.boxes: + # 获取数据 + xyxy = box.xyxy[0].cpu().numpy() # [x1, y1, x2, y2] + conf = box.conf.cpu().numpy()[0] # 置信度 + cls_id = int(box.cls.cpu().numpy()[0]) # 类别 ID + cls_name = CLASS_NAMES[cls_id] # 类别名 + + x1, y1, x2, y2 = map(int, xyxy) + print(f" 类别: {cls_name}, 置信度: {conf:.3f}, 框: [{x1}, {y1}, {x2}, {y2}]") + + # 画框 + cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) + # 画标签 + label = f"{cls_name} {conf:.2f}" + cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) + + # 保存结果 + cv2.imwrite(OUTPUT_PATH, img) + print(f"\n🖼️ 可视化结果已保存: {OUTPUT_PATH}") + + # 显示(可选) + if SHOW_IMAGE: + cv2.imshow("YOLOv8 Detection", img) + cv2.waitKey(0) + cv2.destroyAllWindows() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/ailai_pc/output_pt.jpg b/ailai_pc/output_pt.jpg new file mode 100644 index 0000000..d935dfa Binary files /dev/null and b/ailai_pc/output_pt.jpg differ diff --git a/ailai_pc/yolo_obb_dataset/1.jpg b/ailai_pc/yolo_obb_dataset/1.jpg new file mode 100644 index 0000000..fd8a6cc Binary files /dev/null and b/ailai_pc/yolo_obb_dataset/1.jpg differ diff --git a/main/README.md b/main/README.md index e53edcc..418fd8b 100644 --- a/main/README.md +++ b/main/README.md @@ -26,25 +26,50 @@ pip install opencv-python numpy rknnlite ``` -## 函数调用 +## 函数调用1.0 您也可以直接调用 calculate_offset_from_image 函数,以便集成到其他项目中: 示例 1: 仅获取偏移量(不画图) -** + +```bash from calculate_offset import calculate_offset_from_image result = calculate_offset_from_image("your_image_path.jpg", visualize=False) if result['success']: print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm") else: print("Error:", result['message']) -** +``` 示例 2: 获取偏移量并保存可视化图 -** + +```bash from calculate_offset import calculate_offset_from_image result = calculate_offset_from_image("your_image_path.jpg", visualize=True) -** +``` -该函数返回一个包含下列字段的字典: +## 函数调用2.0 + +示例 1: 仅获取偏移量(不画图) + +```bash +from caculate_diff2.0 import calculate_offset_from_image + +result = calculate_offset_from_image("11.jpg", visualize=False) +if result['success']: + print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm") +else: + print("Error:", result['message']) + +``` +示例 2: 获取偏移量并保存可视化图 + +```bash +from caculate_diff2.0 import calculate_offset_from_image + +result = calculate_offset_from_image("11.jpg", visualize=True) + +``` + +##该函数返回一个包含下列字段的字典1.0: success: 成功标志(True/False) dx_mm: 水平偏移(毫米) @@ -52,3 +77,18 @@ result = calculate_offset_from_image("your_image_path.jpg", visualize=True) cx: 中心点 x 坐标(像素) cy: 中心点 y 坐标(像素) message: 错误信息或成功提示 + +##该函数返回一个包含下列字段的字典2.0: + + success: 成功标志(True/False) + dx_mm: 水平偏移(毫米) + dy_mm: 垂直偏移(毫米) + cx: 中心点 x 坐标(像素) + cy: 中心点 y 坐标(像素) + message: 错误信息或成功提示 + class_id: 检测类别 ID #这里是bag的id是0 + obj_conf: 检测置信度 #这就是识别为料袋的置信度 + bbox: 检测矩形框 [x_left, y_top, width, height] + message: 错误信息或成功提示 + + diff --git a/main/caculate_diff2.0.py b/main/caculate_diff2.0.py new file mode 100644 index 0000000..549c8ed --- /dev/null +++ b/main/caculate_diff2.0.py @@ -0,0 +1,235 @@ +import cv2 +import numpy as np +import os +from rknnlite.api import RKNNLite + +# ====================== 配置区 ====================== +MODEL_PATH = "point.rknn" +OUTPUT_DIR = "./output_rknn" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# 固定参考点(像素坐标) +FIXED_REF_POINT = (535, 605) + +# mm/px 缩放因子(根据标定数据填写) +width_mm = 70.0 +width_px = 42 +SCALE_X = width_mm / float(width_px) +height_mm = 890.0 +height_px = 507 +SCALE_Y = height_mm / float(height_px) +print(f"Scale factors: SCALE_X={SCALE_X:.3f} mm/px, SCALE_Y={SCALE_Y:.3f} mm/px") + +# 输入尺寸 +IMG_SIZE = (640, 640) + + +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + + +def safe_sigmoid(x): + x = np.clip(x, -50, 50) + return 1.0 / (1.0 + np.exp(-x)) + + +def softmax(x): + x = x - np.max(x) + e = np.exp(x) + return e / e.sum() + + +def dfl_to_xywh(loc, grid_x, grid_y, stride): + """将 DFL 输出解析为 xywh""" + xywh_ = np.zeros(4) + xywh = np.zeros(4) + + # 每个维度 16 bins 做 softmax + for i in range(4): + l = loc[i * 16:(i + 1) * 16] + l = softmax(l) + xywh_[i] = sum([j * l[j] for j in range(16)]) + + # 对应公式 + xywh_[0] = (grid_x + 0.5) - xywh_[0] + xywh_[1] = (grid_y + 0.5) - xywh_[1] + xywh_[2] = (grid_x + 0.5) + xywh_[2] + xywh_[3] = (grid_y + 0.5) + xywh_[3] + + # 转成中心点 + 宽高 + xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride + xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride + xywh[2] = (xywh_[2] - xywh_[0]) * stride + xywh[3] = (xywh_[3] - xywh_[1]) * stride + + # 转为左上角坐标 + xywh[0] = xywh[0] - xywh[2] / 2 + xywh[1] = xywh[1] - xywh[3] / 2 + return xywh + + +def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0): + """ + 完整解析 RKNN YOLO-Pose 输出 + 返回 keypoints, class_id, obj_conf, bbox(已映射回原图) + """ + boxes = [] + obj_confs = [] + class_ids = [] + + # 遍历前三个输出 tensor (det 输出) + for idx in range(3): + det = np.array(outputs[idx])[0] # (C,H,W) + C, H, W = det.shape + num_classes = C - 64 # 前64通道为 DFL bbox + stride = 640 // H + + for h in range(H): + for w in range(W): + for c in range(num_classes): + conf = safe_sigmoid(det[64 + c, h, w]) + if conf >= conf_threshold: + loc = det[:64, h, w].astype(np.float32) + xywh = dfl_to_xywh(loc, w, h, stride) + boxes.append(xywh) + obj_confs.append(conf) + class_ids.append(c) + + if not obj_confs: + best_box = np.array([0, 0, 0, 0]) + class_id = -1 + obj_conf = 0.0 + else: + max_idx = np.argmax(obj_confs) + best_box = boxes[max_idx] + class_id = class_ids[max_idx] + obj_conf = obj_confs[max_idx] + + # 🔹 bbox 坐标映射回原图 + x, y, w, h = best_box + x = (x - dx) / scale + y = (y - dy) / scale + w = w / scale + h = h / scale + best_box = np.array([x, y, w, h]) + + # 🔹 关键点解析 + kpt_output = np.array(outputs[3])[0] # (num_kpts, 3, num_anchor) + confs = kpt_output[:, 2, :] + best_anchor_idx = np.argmax(np.mean(confs, axis=0)) + kpt_data = kpt_output[:, :, best_anchor_idx] + + keypoints = [] + for i in range(kpt_data.shape[0]): + x_img, y_img, vis_conf_raw = kpt_data[i] + vis_prob = safe_sigmoid(vis_conf_raw) + x_orig = (x_img - dx) / scale + y_orig = (y_img - dy) / scale + keypoints.append([x_orig, y_orig, vis_prob]) + + return np.array(keypoints), class_id, obj_conf, best_box + + +def compute_offset(keypoints, fixed_point, scale_x, scale_y): + if len(keypoints) < 2: + return None + p1, p2 = keypoints[0], keypoints[1] + cx = (p1[0] + p2[0]) / 2.0 + cy = (p1[1] + p2[1]) / 2.0 + dx_mm = (cx - fixed_point[0]) * scale_x + dy_mm = (cy - fixed_point[1]) * scale_y + return cx, cy, dx_mm, dy_mm + + +def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path): + vis = image.copy() + colors = [(0, 0, 255), (0, 255, 255)] + cx, cy, dx_mm, dy_mm = offset_info + fx, fy = map(int, fixed_point) + + # 绘制关键点 + for i, (x, y, conf) in enumerate(keypoints[:2]): + if conf > 0.5: + cv2.circle(vis, (int(x), int(y)), 6, colors[i], -1) + if len(keypoints) >= 2: + cv2.line(vis, + (int(keypoints[0][0]), int(keypoints[0][1])), + (int(keypoints[1][0]), int(keypoints[1][1])), + (0, 255, 0), 2) + + # 绘制 bbox + x, y, w, h = bbox + cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2) + + # 绘制中心点 + cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3) + cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3) + cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05) + cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + + cv2.imwrite(save_path, vis) + + +def calculate_offset_from_image(image_path, visualize=False): + orig = cv2.imread(image_path) + if orig is None: + return {'success': False, 'message': f'Failed to load image: {image_path}'} + + img_resized, scale, dx, dy = letterbox_resize(orig, IMG_SIZE) + infer_img = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8) + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(MODEL_PATH) + if ret != 0: + return {'success': False, 'message': 'Failed to load RKNN model'} + + try: + rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + outputs = rknn.inference([infer_img]) + finally: + rknn.release() + + try: + keypoints, class_id, obj_conf, bbox = parse_pose_outputs(outputs, dx=dx, dy=dy, scale=scale) + except Exception as e: + return {'success': False, 'message': f'Parse error: {str(e)}'} + + offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y) + if offset_info is None: + return {'success': False, 'message': 'Not enough keypoints'} + + cx, cy, dx_mm, dy_mm = offset_info + + if visualize: + vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(image_path)}") + visualize_result(orig, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path) + + return {'success': True, 'dx_mm': dx_mm, 'dy_mm': dy_mm, + 'cx': cx, 'cy': cy, 'class_id': class_id, + 'obj_conf': obj_conf, 'bbox': bbox, + 'message': 'Success'} + + +# ====================== 使用示例 ====================== +if __name__ == "__main__": + image_path = "11.jpg" + result = calculate_offset_from_image(image_path, visualize=True) + + if result['success']: + print(f"Center point: ({result['cx']:.1f}, {result['cy']:.1f})") + print(f"Offset: DeltaX={result['dx_mm']:+.2f} mm, DeltaY={result['dy_mm']:+.2f} mm") + print(f"Class ID: {result['class_id']}, Confidence: {result['obj_conf']:.3f}") + print(f"BBox: {result['bbox']}") + else: + print("Error:", result['message'])