diff --git a/ailai_pc/best1.pt b/ailai_pc/11.pt similarity index 100% rename from ailai_pc/best1.pt rename to ailai_pc/11.pt diff --git a/ailai_pc/22222.jpg b/ailai_pc/22222.jpg deleted file mode 100644 index b76910c..0000000 Binary files a/ailai_pc/22222.jpg and /dev/null differ diff --git a/ailai_pc/best.pt b/ailai_pc/best12.pt similarity index 100% rename from ailai_pc/best.pt rename to ailai_pc/best12.pt diff --git a/ailai_pc/detect.pt b/ailai_pc/detect.pt new file mode 100644 index 0000000..d57daa1 Binary files /dev/null and b/ailai_pc/detect.pt differ diff --git a/ailai_pc/detect1.pt b/ailai_pc/detect1.pt new file mode 100644 index 0000000..70b8aa6 Binary files /dev/null and b/ailai_pc/detect1.pt differ diff --git a/ailai_pc/detect2.pt b/ailai_pc/detect2.pt new file mode 100644 index 0000000..7096ab5 Binary files /dev/null and b/ailai_pc/detect2.pt differ diff --git a/ailai_pc/detet_pc.py b/ailai_pc/detet_pc.py index b9abdef..ea56eb8 100644 --- a/ailai_pc/detet_pc.py +++ b/ailai_pc/detet_pc.py @@ -1,72 +1,76 @@ -# detect_pt.py -import cv2 -import torch from ultralytics import YOLO +from ultralytics.utils.ops import non_max_suppression +import torch +import cv2 # ====================== # 配置参数 # ====================== -MODEL_PATH = 'best.pt' # 你的训练模型路径(yolov8n.pt 或你自己训练的) -#IMG_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/train/192.168.0.234_01_202510141514352.jpg' # 测试图像路径 +MODEL_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/best12.pt' IMG_PATH = '1.jpg' -OUTPUT_PATH = '/home/hx/开发/ailai_image_obb/ailai_pc/output_pt.jpg' # 可视化结果保存路径 -CONF_THRESH = 0.5 # 置信度阈值 -CLASS_NAMES = ['bag'] # 你的类别名列表(按训练时顺序) - -# 是否显示窗口(适合有 GUI 的 PC) -SHOW_IMAGE = True +OUTPUT_PATH = 'output_pt.jpg' +CONF_THRESH = 0.5 +IOU_THRESH = 0.45 +CLASS_NAMES = ['bag'] # ====================== -# 主函数 +# 主函数(优化版) # ====================== def main(): - # 检查 CUDA device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f"✅ 使用设备: {device}") # 加载模型 - print("➡️ 加载 YOLO 模型...") - model = YOLO(MODEL_PATH) # 自动加载架构和权重 + model = YOLO(MODEL_PATH) model.to(device) - # 推理 - print("➡️ 开始推理...") - results = model(IMG_PATH, imgsz=640, conf=CONF_THRESH, device=device) + # 推理:获取原始结果(不立即解析) + print("➡️ 开始推理...") + results = model(IMG_PATH, imgsz=640, conf=CONF_THRESH, device=device, verbose=True) # 获取第一张图的结果 r = results[0] - # 获取原始图像(BGR) + # 🚀 关键:使用原始 tensor 在 GPU 上处理 + # pred: [x1, y1, x2, y2, conf, cls] 形状为 [num_boxes, 6] + pred = r.boxes.data # 已经在 GPU 上,类型: torch.Tensor + + # 🔍 在 GPU 上做 NMS(这才是正确姿势) + # 注意:non_max_suppression 输入是 [batch, num_boxes, 6] + det = non_max_suppression( + pred.unsqueeze(0), # 增加 batch 维度 + conf_thres=CONF_THRESH, + iou_thres=IOU_THRESH, + classes=None, + agnostic=False, + max_det=100 + )[0] # 取第一个(也是唯一一个)batch + + # ✅ 此时所有后处理已完成,现在才从 GPU 拷贝到 CPU + if det is not None and len(det): + det = det.cpu().numpy() # ← 只拷贝一次! + else: + det = [] + + # 读取图像 img = cv2.imread(IMG_PATH) if img is None: raise FileNotFoundError(f"无法读取图像: {IMG_PATH}") print("\n📋 检测结果:") - for box in r.boxes: - # 获取数据 - xyxy = box.xyxy[0].cpu().numpy() # [x1, y1, x2, y2] - conf = box.conf.cpu().numpy()[0] # 置信度 - cls_id = int(box.cls.cpu().numpy()[0]) # 类别 ID - cls_name = CLASS_NAMES[cls_id] # 类别名 - + for *xyxy, conf, cls_id in det: x1, y1, x2, y2 = map(int, xyxy) + cls_name = CLASS_NAMES[int(cls_id)] print(f" 类别: {cls_name}, 置信度: {conf:.3f}, 框: [{x1}, {y1}, {x2}, {y2}]") - # 画框 + # 画框和标签 cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) - # 画标签 label = f"{cls_name} {conf:.2f}" cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) # 保存结果 cv2.imwrite(OUTPUT_PATH, img) - print(f"\n🖼️ 可视化结果已保存: {OUTPUT_PATH}") - - # 显示(可选) - if SHOW_IMAGE: - cv2.imshow("YOLOv8 Detection", img) - cv2.waitKey(0) - cv2.destroyAllWindows() + print(f"\n🖼️ 可视化结果已保存: {OUTPUT_PATH}") if __name__ == '__main__': main() \ No newline at end of file diff --git a/ailai_pc/detet_pc_f.py b/ailai_pc/detet_pc_f.py new file mode 100644 index 0000000..f2c7658 --- /dev/null +++ b/ailai_pc/detet_pc_f.py @@ -0,0 +1,135 @@ +from ultralytics import YOLO +from ultralytics.utils.ops import non_max_suppression +import torch +import cv2 +import os +import time +from pathlib import Path + +# ====================== +# 配置参数 +# ====================== +MODEL_PATH = 'detect.pt' # 你的模型路径 +INPUT_FOLDER = '/home/hx/开发/ailai_image_obb/ailai_pc/train' # 输入图片文件夹 +OUTPUT_FOLDER = '/home/hx/开发/ailai_image_obb/ailai_pc/results' # 输出结果文件夹(自动创建) +CONF_THRESH = 0.5 +IOU_THRESH = 0.45 +CLASS_NAMES = ['bag'] +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' +IMG_SIZE = 640 +SHOW_IMAGE = False # 是否逐张显示图像(适合调试) + +# 支持的图像格式 +IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'} + + +# ====================== +# 获取文件夹中所有图片路径 +# ====================== +def get_image_paths(folder): + folder = Path(folder) + if not folder.exists(): + raise FileNotFoundError(f"输入文件夹不存在: {folder}") + paths = [p for p in folder.iterdir() if p.suffix.lower() in IMG_EXTENSIONS] + if not paths: + print(f"⚠️ 在 {folder} 中未找到图片") + return sorted(paths) # 按名称排序 + + +# ====================== +# 主函数(批量推理) +# ====================== +def main(): + print(f"✅ 使用设备: {DEVICE}") + + # 创建输出文件夹 + os.makedirs(OUTPUT_FOLDER, exist_ok=True) + print(f"📁 输出结果将保存到: {OUTPUT_FOLDER}") + + # 加载模型 + print("➡️ 加载 YOLO 模型...") + model = YOLO(MODEL_PATH) + model.to(DEVICE) + + # 获取图片列表 + img_paths = get_image_paths(INPUT_FOLDER) + if not img_paths: + return + + print(f"📸 共找到 {len(img_paths)} 张图片,开始批量推理...\n") + + total_start_time = time.time() + + for idx, img_path in enumerate(img_paths, 1): + print(f"{'=' * 50}") + print(f"🖼️ 处理第 {idx}/{len(img_paths)} 张: {img_path.name}") + + # 手动计时 + start_time = time.time() + + # 推理(verbose=True 输出内部耗时) + results = model(str(img_path), imgsz=IMG_SIZE, conf=CONF_THRESH, device=DEVICE, verbose=True) + inference_time = time.time() - start_time + + # 获取结果 + r = results[0] + pred = r.boxes.data # GPU 上的原始输出 + + # 在 GPU 上做 NMS + det = non_max_suppression( + pred.unsqueeze(0), + conf_thres=CONF_THRESH, + iou_thres=IOU_THRESH, + classes=None, + agnostic=False, + max_det=100 + )[0] + + # 拷贝到 CPU(仅一次) + if det is not None and len(det): + det = det.cpu().numpy() + else: + det = [] + + # 读取图像并绘制 + img = cv2.imread(str(img_path)) + if img is None: + print(f"❌ 无法读取图像: {img_path}") + continue + + print(f"\n📋 检测结果:") + for *xyxy, conf, cls_id in det: + x1, y1, x2, y2 = map(int, xyxy) + cls_name = CLASS_NAMES[int(cls_id)] + print(f" 类别: {cls_name}, 置信度: {conf:.3f}, 框: [{x1}, {y1}, {x2}, {y2}]") + cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2) + label = f"{cls_name} {conf:.2f}" + cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) + + # 保存结果 + output_path = os.path.join(OUTPUT_FOLDER, f"result_{img_path.name}") + cv2.imwrite(output_path, img) + print(f"\n✅ 结果已保存: {output_path}") + + # 显示(可选) + if SHOW_IMAGE: + cv2.imshow("Detection", img) + if cv2.waitKey(1) & 0xFF == ord('q'): # 按 Q 退出 + break + + # 输出总耗时 + total_infer_time = time.time() - start_time + print(f"⏱️ 总处理时间: {total_infer_time * 1000:.1f}ms (推理+后处理)") + + # 结束 + total_elapsed = time.time() - total_start_time + print(f"\n🎉 批量推理完成!共处理 {len(img_paths)} 张图片,总耗时: {total_elapsed:.2f} 秒") + print( + f"🚀 平均每张: {total_elapsed / len(img_paths) * 1000:.1f} ms ({1 / (total_elapsed / len(img_paths)):.1f} FPS)") + + if SHOW_IMAGE: + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/ailai_pc/error_test.py b/ailai_pc/error_test.py new file mode 100644 index 0000000..636c1ca --- /dev/null +++ b/ailai_pc/error_test.py @@ -0,0 +1,122 @@ +import cv2 +import numpy as np +import os +from ultralytics import YOLO + +# ====================== 用户配置 ====================== +MODEL_PATH = '11.pt' +IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2/train' # 验证集图片目录 +LABEL_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2/train' # 标签目录(与图片同名 .txt) +OUTPUT_DIR = './output_images' + + +IMG_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff', '.tif', '.webp'} +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ====================== 可视化函数 ====================== +def draw_keypoints_on_image(image, kpts_xy, colors, label_prefix=''): + for j, (x, y) in enumerate(kpts_xy): + x, y = int(x), int(y) + cv2.circle(image, (x, y), 8, colors[j % len(colors)], -1) + cv2.putText(image, f'{label_prefix}{j+1}', (x + 10, y - 10), + cv2.FONT_HERSHEY_SIMPLEX, 1, colors[j % len(colors)], 2) + return image + +# ====================== 标签读取函数 ====================== +def load_keypoints_from_label(label_path, img_shape): + """ + 标签格式: + xc yc w h x1 y1 v1 x2 y2 v2 x3 y3 v3 x4 y4 v4 + 共 17 项:1 + 4 + 12 + """ + if not os.path.exists(label_path): + print(f"⚠️ 找不到标签文件: {label_path}") + return None + + H, W = img_shape[:2] + with open(label_path, 'r') as f: + line = f.readline().strip().split() + + if len(line) < 17: + print(f"⚠️ 标签长度不足: {label_path} ({len(line)}项)") + return None + + floats = [float(x) for x in line[5:]] # 跳过前5个(class + bbox) + coords = np.array(floats).reshape(-1, 3)[:, :2] # (4,2) + coords[:, 0] *= W + coords[:, 1] *= H + return coords + +# ====================== 主程序 ====================== +if __name__ == "__main__": + print("🚀 开始验证集关键点误差计算") + + model = YOLO(MODEL_PATH) + print(f"✅ 模型加载完成: {MODEL_PATH}") + + image_files = [ + f for f in os.listdir(IMAGE_SOURCE_DIR) + if os.path.splitext(f.lower())[1] in IMG_EXTENSIONS + ] + + if not image_files: + print("❌ 未找到图像文件") + exit(1) + + total_errors = [] + skipped = 0 + colors_gt = [(0, 255, 0), (0, 200, 0), (0, 150, 0), (0, 100, 0)] + colors_pred = [(0, 0, 255)] * 4 + + for img_filename in image_files: + img_path = os.path.join(IMAGE_SOURCE_DIR, img_filename) + label_path = os.path.join(LABEL_DIR, os.path.splitext(img_filename)[0] + '.txt') + + img = cv2.imread(img_path) + if img is None: + print(f"❌ 无法读取图像: {img_path}") + skipped += 1 + continue + + gt_kpts = load_keypoints_from_label(label_path, img.shape) + if gt_kpts is None or len(gt_kpts) < 4: + print(f"⚠️ 跳过 {img_filename}:标签点不足") + skipped += 1 + continue + + results = model(img, verbose=False) + if not results or results[0].keypoints is None or len(results[0].keypoints) == 0: + print(f"⚠️ {img_filename}: 无检测结果,跳过") + skipped += 1 + continue + + pred_kpts = results[0].keypoints.xy[0].cpu().numpy() + if pred_kpts.shape[0] != gt_kpts.shape[0]: + print(f"⚠️ {img_filename}: 点数不匹配 GT={len(gt_kpts)}, Pred={len(pred_kpts)},跳过") + skipped += 1 + continue + + # 计算误差 + errors = np.linalg.norm(pred_kpts - gt_kpts, axis=1) + mean_error = np.mean(errors) + total_errors.append(mean_error) + + print(f"📸 {img_filename}: 每点误差={np.round(errors, 2)} 像素, 平均误差={mean_error:.2f}px") + + # 可视化 + vis_img = img.copy() + vis_img = draw_keypoints_on_image(vis_img, gt_kpts, colors_gt, label_prefix='GT') + vis_img = draw_keypoints_on_image(vis_img, pred_kpts, colors_pred, label_prefix='P') + + save_path = os.path.join(OUTPUT_DIR, f"compare_{img_filename}") + cv2.imwrite(save_path, vis_img) + + # ====================== 结果统计 ====================== + print("\n======================") + if total_errors: + print(f"🎯 有效样本数: {len(total_errors)} 张") + print(f"🚫 跳过样本数: {skipped} 张") + print(f"📈 平均关键点误差: {np.mean(total_errors):.2f} 像素") + else: + print(f"⚠️ 所有样本均被跳过(跳过 {skipped} 张)") + print("======================") diff --git a/ailai_pc/jianzhi/best.pt b/ailai_pc/jianzhi/best.pt new file mode 100644 index 0000000..0774f1b Binary files /dev/null and b/ailai_pc/jianzhi/best.pt differ diff --git a/ailai_pc/jianzhi/jz.pt b/ailai_pc/jianzhi/jz.pt new file mode 100644 index 0000000..ca8b266 Binary files /dev/null and b/ailai_pc/jianzhi/jz.pt differ diff --git a/ailai_pc/jianzhi/jz.py b/ailai_pc/jianzhi/jz.py new file mode 100644 index 0000000..9993a92 --- /dev/null +++ b/ailai_pc/jianzhi/jz.py @@ -0,0 +1,95 @@ +import torch +import torch.nn as nn +from ultralytics import YOLO + +# ------------------- 核心剪枝函数 ------------------- +def prune_conv_bn(conv_bn, keep_idx): + """剪枝 ConvBNAct 模块的 Conv + BN""" + conv = conv_bn.conv + bn = conv_bn.bn + + # 跳过 depthwise + if conv.groups != 1: + return conv_bn + + # 剪枝 conv + new_conv = nn.Conv2d( + in_channels=conv.in_channels, + out_channels=len(keep_idx), + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + dilation=conv.dilation, + groups=conv.groups, + bias=(conv.bias is not None) + ).to(conv.weight.device) + new_conv.weight.data = conv.weight.data[keep_idx].clone() + if conv.bias is not None: + new_conv.bias.data = conv.bias.data[keep_idx].clone() + + # 剪枝 BN + if bn is not None: + new_bn = nn.BatchNorm2d(len(keep_idx)).to(bn.weight.device) + new_bn.weight.data = bn.weight.data[keep_idx].clone() + new_bn.bias.data = bn.bias.data[keep_idx].clone() + new_bn.running_mean = bn.running_mean[keep_idx].clone() + new_bn.running_var = bn.running_var[keep_idx].clone() + else: + new_bn = None + + # 替换模块 + conv_bn.conv = new_conv + conv_bn.bn = new_bn + return conv_bn + +def get_prune_idx(conv_bn, prune_ratio=0.3): + """根据 BN gamma 或 L2 norm 计算要保留的通道索引""" + conv = conv_bn.conv + bn = conv_bn.bn + if bn is not None: + gamma = bn.weight.data.abs() + else: + gamma = conv.weight.data.view(conv.out_channels, -1).norm(p=2, dim=1) + keep_num = max(int(conv.out_channels * (1 - prune_ratio)), 1) + _, idxs = torch.topk(gamma, keep_num) + return idxs + +def prune_yolov11_model(model, prune_ratio=0.3): + """遍历 YOLO 模型,剪枝所有 ConvBNAct""" + for name, m in model.named_modules(): + if m.__class__.__name__ == "ConvBNAct": + keep_idx = get_prune_idx(m, prune_ratio) + prune_conv_bn(m, keep_idx) + return model + +# ------------------- 主流程 ------------------- +def main(model_path="best.pt", save_path="yolov11_pruned_ts.pt", + prune_ratio=0.3, device="cuda"): + + # 加载 YOLO 模型 + model = YOLO(model_path).model + model.eval().to(device) + + # 剪枝 + print(f"✅ 开始剪枝,比例: {prune_ratio}") + model = prune_yolov11_model(model, prune_ratio) + print("✅ 剪枝完成") + + # 构造 dummy 输入 + example_inputs = torch.randn(1, 3, 640, 640).to(device) + + # TorchScript 跟踪 + print("🔹 开始 TorchScript 跟踪...") + traced_model = torch.jit.trace(model, example_inputs) + traced_model = torch.jit.optimize_for_inference(traced_model) + + # 保存 TorchScript 模型 + traced_model.save(save_path) + print(f"✅ TorchScript 剪枝模型已保存: {save_path}") + +if __name__ == "__main__": + main( + model_path="best.pt", + save_path="yolov11_pruned_ts.pt", + prune_ratio=0.3 + ) diff --git a/ailai_pc/jianzhi/yolov11_pruned.pt b/ailai_pc/jianzhi/yolov11_pruned.pt new file mode 100644 index 0000000..ab80e07 Binary files /dev/null and b/ailai_pc/jianzhi/yolov11_pruned.pt differ diff --git a/ailai_pc/last.pt b/ailai_pc/last.pt new file mode 100644 index 0000000..d6451db Binary files /dev/null and b/ailai_pc/last.pt differ diff --git a/ailai_pc/lingshi.py b/ailai_pc/lingshi.py new file mode 100644 index 0000000..ee78efb --- /dev/null +++ b/ailai_pc/lingshi.py @@ -0,0 +1,8 @@ +from ultralytics import YOLO +import torch +model = YOLO("/home/hx/开发/ailai_image_obb/ailai_pc/detect.pt") + +for name, module in model.model.named_modules(): + if isinstance(module, torch.nn.Conv2d): + w = module.weight + print(f"{name} -> min: {w.min().item():.3f}, max: {w.max().item():.3f}") diff --git a/ailai_pc/output_images/offset_1.jpg b/ailai_pc/output_images/offset_1.jpg deleted file mode 100644 index 5899316..0000000 Binary files a/ailai_pc/output_images/offset_1.jpg and /dev/null differ diff --git a/ailai_pc/point_test.py b/ailai_pc/point_test.py index 1793fcb..d950d3e 100644 --- a/ailai_pc/point_test.py +++ b/ailai_pc/point_test.py @@ -5,7 +5,7 @@ import os # ====================== 用户配置 ====================== MODEL_PATH = 'best.pt' -IMAGE_SOURCE_DIR = './train' # 👈 修改为你的图像文件夹路径 +IMAGE_SOURCE_DIR = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2/val' # 👈 修改为你的图像文件夹路径 OUTPUT_DIR = './output_images' # 保存结果的文件夹 # 支持的图像扩展名 diff --git a/ailai_pc/rename_file.py b/ailai_pc/rename_file.py index 57d4f5a..945c56b 100644 --- a/ailai_pc/rename_file.py +++ b/ailai_pc/rename_file.py @@ -2,10 +2,10 @@ import os import shutil # ================= 用户配置 ================= -FOLDER_PATH = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/point2' # 图片和 txt 所在文件夹 +FOLDER_PATH = '/media/hx/04e879fa-d697-4b02-ac7e-a4148876ebb0/dataset/20251020' # 图片和 txt 所在文件夹 IMG_EXT = '.jpg' TXT_EXT = '.txt' -START_NUM = 1 # 从 1 开始编号 +START_NUM = 571 # 从 1 开始编号 # ================= 获取文件列表 ================= files = os.listdir(FOLDER_PATH) diff --git a/ailai_pc/yolo_obb_dataset/11111.py b/ailai_pc/yolo_obb_dataset/11111.py new file mode 100644 index 0000000..7b84d79 --- /dev/null +++ b/ailai_pc/yolo_obb_dataset/11111.py @@ -0,0 +1,138 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# ====================== 配置 ====================== +MODEL_PATH = "yolo11.rknn" # RKNN 模型路径 +IMG_PATH = "11.jpg" # 待检测图片 +IMG_SIZE = (640, 640) # 模型输入尺寸 (w,h) +OBJ_THRESH = 0.001 # 目标置信度阈值 +NMS_THRESH = 0.45 # NMS 阈值 +CLASS_NAME = ["bag"] # 单类别 +OUTPUT_DIR = "./result" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# ====================== 工具函数 ====================== +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + +def dfl_numpy(position): + """Distribution Focal Loss 解析,纯 NumPy 版本""" + n, c, h, w = position.shape + p_num = 4 + mc = c // p_num + y = position.reshape(n, p_num, mc, h, w) + y = np.exp(y) / np.sum(np.exp(y), axis=2, keepdims=True) + acc = np.arange(mc).reshape(1,1,mc,1,1) + y = np.sum(y * acc, axis=2) + return y + +def box_process(position): + """解析网络输出的框坐标""" + grid_h, grid_w = position.shape[2:4] + col, row = np.meshgrid(np.arange(grid_w), np.arange(grid_h)) + col = col.reshape(1,1,grid_h,grid_w) + row = row.reshape(1,1,grid_h,grid_w) + grid = np.concatenate((col,row), axis=1) + stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1,2,1,1) + + position = dfl_numpy(position) + box_xy = grid + 0.5 - position[:,0:2,:,:] + box_xy2 = grid + 0.5 + position[:,2:4,:,:] + xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1) + return xyxy + +def filter_boxes(boxes, box_confidences, box_class_probs): + # sigmoid objectness + box_confidences = 1 / (1 + np.exp(-box_confidences)) + # softmax class probs + box_class_probs = np.exp(box_class_probs) + box_class_probs /= np.sum(box_class_probs, axis=-1, keepdims=True) + + box_confidences = box_confidences.reshape(-1) + class_max_score = np.max(box_class_probs, axis=-1) + classes = np.argmax(box_class_probs, axis=-1) + _pos = np.where(class_max_score * box_confidences >= OBJ_THRESH) + boxes = boxes[_pos] + classes = classes[_pos] + scores = (class_max_score * box_confidences)[_pos] + return boxes, classes, scores + +def post_process(outputs, scale, dx, dy): + boxes, classes_conf, scores = [], [], [] + branch_num = 3 + for i in range(branch_num): + boxes.append(box_process(outputs[i*3])) + classes_conf.append(outputs[i*3+1]) + scores.append(outputs[i*3+2]) # 使用真实 class 输出 + + def sp_flatten(x): + ch = x.shape[1] + x = x.transpose(0,2,3,1) + return x.reshape(-1,ch) + + boxes = np.concatenate([sp_flatten(b) for b in boxes]) + classes_conf = np.concatenate([sp_flatten(c) for c in classes_conf]) + scores = np.concatenate([sp_flatten(s) for s in scores]) + + boxes, classes, scores = filter_boxes(boxes, scores, classes_conf) + + if boxes.shape[0] == 0: + return None, None, None + + # 只保留置信度最高的框 + max_idx = np.argmax(scores) + boxes = boxes[max_idx:max_idx+1] + classes = classes[max_idx:max_idx+1] + scores = scores[max_idx:max_idx+1] + + # 映射回原图 + boxes[:, [0,2]] -= dx + boxes[:, [1,3]] -= dy + boxes /= scale + boxes = boxes.clip(min=0) + + return boxes, classes, scores + +def draw(image, boxes, scores, classes): + for box, score, cl in zip(boxes, scores, classes): + x1, y1, x2, y2 = [int(b) for b in box] + cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 2) + cv2.putText(image, f"{CLASS_NAME[cl]} {score:.3f}", (x1, y1-5), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2) + +# ====================== 主流程 ====================== +img = cv2.imread(IMG_PATH) +if img is None: + raise ValueError(f"Image {IMG_PATH} not found!") + +img_resized, scale, dx, dy = letterbox_resize(img, IMG_SIZE) +input_data = np.expand_dims(img_resized, 0) # 4 维输入 + +rknn = RKNNLite(verbose=False) +rknn.load_rknn(MODEL_PATH) +rknn.init_runtime() +outputs = rknn.inference([input_data]) +rknn.release() + +print("Outputs len:", len(outputs)) +for i, out in enumerate(outputs): + print(f"outputs[{i}].shape = {out.shape}, min={out.min()}, max={out.max()}, mean={out.mean():.4f}") + +boxes, classes, scores = post_process(outputs, scale, dx, dy) +if boxes is None: + print("Detected 0 boxes") +else: + draw(img, boxes, scores, classes) + result_path = os.path.join(OUTPUT_DIR, os.path.basename(IMG_PATH)) + cv2.imwrite(result_path, img) + print(f"Detection result saved to {result_path}") diff --git a/main/README.md b/main/README.md index 6062708..24dd210 100644 --- a/main/README.md +++ b/main/README.md @@ -51,7 +51,7 @@ result = calculate_offset_from_image("your_image_path.jpg", visualize=True) 示例 1: 仅获取偏移量(不画图) ```bash -from caculate_diff2.0 import calculate_offset_from_image +from calculate_diff2.0 import calculate_offset_from_image result = calculate_offset_from_image("11.jpg", visualize=False) if result['success']: @@ -63,7 +63,7 @@ else: 示例 2: 获取偏移量并保存可视化图 ```bash -from caculate_diff2.0 import calculate_offset_from_image +from calculate_diff2.0 import calculate_offset_from_image result = calculate_offset_from_image("11.jpg", visualize=True) @@ -76,7 +76,6 @@ result = calculate_offset_from_image("11.jpg", visualize=True) dy_mm: 垂直偏移(毫米) cx: 中心点 x 坐标(像素) cy: 中心点 y 坐标(像素) -<<<<<<< HEAD message: 错误信息或成功提示 ##该函数返回一个包含下列字段的字典2.0: @@ -93,6 +92,3 @@ result = calculate_offset_from_image("11.jpg", visualize=True) message: 错误信息或成功提示 -======= - message: 错误信息或成功提示 ->>>>>>> a6505573b9361ce4ab920ddc55f4bc6d86d7dfb4 diff --git a/main/caculate_diff(可用设备树版本,测试,不用做推理).py b/main/caculate_diff(可用设备树版本,测试,不用做推理).py new file mode 100644 index 0000000..7e37a3c --- /dev/null +++ b/main/caculate_diff(可用设备树版本,测试,不用做推理).py @@ -0,0 +1,256 @@ +# detect_fixed.py +import cv2 +import numpy as np +import os +import platform +from rknnlite.api import RKNNLite + +# ====================== 配置区 ====================== + +IMAGE_PATH = "11.jpg" # 测试图片 +MODEL_PATH = "point.rknn" +OUTPUT_DIR = "./output_rknn" +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# 固定参考点 & 缩放因子 +FIXED_REF_POINT = (535, 605) +width_mm, width_px = 70.0, 42 +height_mm, height_px = 890.0, 507 +SCALE_X = width_mm / float(width_px) +SCALE_Y = height_mm / float(height_px) +print(f"[INFO] Scale factors: X={SCALE_X:.3f} mm/px, Y={SCALE_Y:.3f} mm/px") + +IMG_SIZE = (640, 640) + +# 设备树路径(用于自动识别平台) +DEVICE_COMPATIBLE_NODE = '/proc/device-tree/compatible' + + +def get_host(): + system = platform.system() + machine = platform.machine() + if system == 'Linux' and machine == 'aarch64': + try: + with open(DEVICE_COMPATIBLE_NODE, 'r') as f: + compatible = f.read() + if 'rk3588' in compatible: + return 'RK3588' + elif 'rk3576' in compatible: + return 'RK3576' + elif 'rk3562' in compatible: + return 'RK3562' + else: + return 'RK3566_RK3568' + except Exception as e: + print(f"Read device tree failed: {e}") + exit(-1) + else: + return f"{system}-{machine}" + + +def letterbox_resize(image, size, bg_color=114): + target_w, target_h = size + h, w = image.shape[:2] + scale = min(target_w / w, target_h / h) + new_w, new_h = int(w * scale), int(h * scale) + resized = cv2.resize(image, (new_w, new_h)) + canvas = np.full((target_h, target_w, 3), bg_color, dtype=np.uint8) + dx, dy = (target_w - new_w) // 2, (target_h - new_h) // 2 + canvas[dy:dy + new_h, dx:dx + new_w] = resized + return canvas, scale, dx, dy + + +def safe_sigmoid(x): + x = np.clip(x, -50, 50) + return 1.0 / (1.0 + np.exp(-x)) + + +def softmax(x): + x = x - np.max(x) + e = np.exp(x) + return e / e.sum() + + +def dfl_to_xywh(loc, grid_x, grid_y, stride): + xywh_ = np.zeros(4) + xywh = np.zeros(4) + for i in range(4): + l = loc[i * 16:(i + 1) * 16] + l = softmax(l) + xywh_[i] = sum(j * l[j] for j in range(16)) + + xywh_[0] = (grid_x + 0.5) - xywh_[0] + xywh_[1] = (grid_y + 0.5) - xywh_[1] + xywh_[2] = (grid_x + 0.5) + xywh_[2] + xywh_[3] = (grid_y + 0.5) + xywh_[3] + + xywh[0] = ((xywh_[0] + xywh_[2]) / 2) * stride + xywh[1] = ((xywh_[1] + xywh_[3]) / 2) * stride + xywh[2] = (xywh_[2] - xywh_[0]) * stride + xywh[3] = (xywh_[3] - xywh_[1]) * stride + + xywh[0] -= xywh[2] / 2 + xywh[1] -= xywh[3] / 2 + return xywh + + +def parse_pose_outputs(outputs, conf_threshold=0.5, dx=0, dy=0, scale=1.0): + boxes = [] + obj_confs = [] + class_ids = [] + + for idx in range(3): # det head + det = np.array(outputs[idx])[0] + C, H, W = det.shape + num_classes = C - 64 + stride = 640 // H + + for h in range(H): + for w in range(W): + for c in range(num_classes): + conf = safe_sigmoid(det[64 + c, h, w]) + if conf >= conf_threshold: + loc = det[:64, h, w].astype(np.float32) + xywh = dfl_to_xywh(loc, w, h, stride) + boxes.append(xywh) + obj_confs.append(conf) + class_ids.append(c) + + if not obj_confs: + best_box = np.array([0, 0, 0, 0]) + class_id = -1 + obj_conf = 0.0 + else: + max_idx = np.argmax(obj_confs) + best_box = boxes[max_idx] + class_id = class_ids[max_idx] + obj_conf = obj_confs[max_idx] + + x, y, w, h = best_box + x = (x - dx) / scale + y = (y - dy) / scale + w /= scale + h /= scale + best_box = [x, y, w, h] + + kpt_output = np.array(outputs[3])[0] + confs = kpt_output[:, 2, :] + best_anchor_idx = np.argmax(np.mean(confs, axis=0)) + kpt_data = kpt_output[:, :, best_anchor_idx] + + keypoints = [] + for i in range(kpt_data.shape[0]): + x_img, y_img, vis_raw = kpt_data[i] + vis_prob = safe_sigmoid(vis_raw) + x_orig = (x_img - dx) / scale + y_orig = (y_img - dy) / scale + keypoints.append([x_orig, y_orig, vis_prob]) + + return np.array(keypoints), class_id, obj_conf, best_box + + +def compute_offset(keypoints, fixed_point, scale_x, scale_y): + if len(keypoints) < 2: return None + p1, p2 = keypoints[0], keypoints[1] + cx = (p1[0] + p2[0]) / 2 + cy = (p1[1] + p2[1]) / 2 + dx_mm = (cx - fixed_point[0]) * scale_x + dy_mm = (cy - fixed_point[1]) * scale_y + return cx, cy, dx_mm, dy_mm + + +def visualize_result(image, keypoints, bbox, fixed_point, offset_info, save_path): + vis = image.copy() + fx, fy = map(int, fixed_point) + cx, cy, dx_mm, dy_mm = offset_info + + for i, (x, y, conf) in enumerate(keypoints[:2]): + if conf > 0.5: + color = (0, 0, 255) if i == 0 else (0, 255, 255) + cv2.circle(vis, (int(x), int(y)), 6, color, -1) + if len(keypoints) >= 2: + cv2.line(vis, (int(keypoints[0][0]), int(keypoints[0][1])), + (int(keypoints[1][0]), int(keypoints[1][1])), + (0, 255, 0), 2) + + x, y, w, h = bbox + cv2.rectangle(vis, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 2) + cv2.circle(vis, (int(cx), int(cy)), 10, (0, 255, 0), 3) + cv2.circle(vis, (fx, fy), 12, (255, 255, 0), 3) + cv2.arrowedLine(vis, (fx, fy), (int(cx), int(cy)), (255, 255, 0), 2, tipLength=0.05) + cv2.putText(vis, f"DeltaX={dx_mm:+.1f}mm", (fx + 30, fy - 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + cv2.putText(vis, f"DeltaY={dy_mm:+.1f}mm", (fx + 30, fy + 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2) + cv2.imwrite(save_path, vis) + + +def main(): + host_name = get_host() + print(f"[INFO] Running on: {host_name}") + + rknn = RKNNLite() + + # ---- Load Model ---- + ret = rknn.load_rknn(MODEL_PATH) + if ret != 0: + print("❌ Failed to load RKNN model!") + exit(ret) + print("✅ Model loaded successfully.") + + # ---- Init Runtime ---- + if host_name in ['RK3576', 'RK3588']: + ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + else: + ret = rknn.init_runtime() + if ret != 0: + print("❌ Init runtime failed!") + rknn.release() + exit(ret) + print("✅ Runtime initialized.") + + # ---- Preprocess ---- + ori_img = cv2.imread(IMAGE_PATH) + if ori_img is None: + print(f"❌ Cannot read image: {IMAGE_PATH}") + return + + img_resized, scale, dx, dy = letterbox_resize(ori_img, IMG_SIZE) + input_tensor = np.expand_dims(img_resized[..., ::-1], 0).astype(np.uint8) # RGB + + # ---- Inference ---- + print("🔍 Starting inference...") + outputs = rknn.inference(inputs=[input_tensor]) + print("✅ Inference completed.") + + # ---- Post-process ---- + try: + keypoints, cls_id, obj_conf, bbox = parse_pose_outputs( + outputs, dx=dx, dy=dy, scale=scale) + + offset_info = compute_offset(keypoints, FIXED_REF_POINT, SCALE_X, SCALE_Y) + if offset_info is None: + print("⚠️ Not enough keypoints detected.") + return + + cx, cy, dx_mm, dy_mm = offset_info + vis_save_path = os.path.join(OUTPUT_DIR, f"result_{os.path.basename(IMAGE_PATH)}") + visualize_result(ori_img, keypoints, bbox, FIXED_REF_POINT, offset_info, vis_save_path) + + print(f"\n🎯 Detection Result:") + print(f"Center: ({cx:.1f}, {cy:.1f})") + print(f"Offset: ΔX={dx_mm:+.2f}mm, ΔY={dy_mm:+.2f}mm") + print(f"Class: {cls_id}, Confidence: {obj_conf:.3f}") + print(f"Saved result to: {vis_save_path}") + + except Exception as e: + print(f"❌ Post-processing error: {e}") + import traceback + traceback.print_exc() + + finally: + rknn.release() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/main/caculate_diff2.0.py b/main/calculate_diff2.0.py similarity index 99% rename from main/caculate_diff2.0.py rename to main/calculate_diff2.0.py index 549c8ed..cb45c7a 100644 --- a/main/caculate_diff2.0.py +++ b/main/calculate_diff2.0.py @@ -4,6 +4,7 @@ import os from rknnlite.api import RKNNLite # ====================== 配置区 ====================== + MODEL_PATH = "point.rknn" OUTPUT_DIR = "./output_rknn" os.makedirs(OUTPUT_DIR, exist_ok=True) diff --git a/main/point.rknn b/main/point.rknn index 1ba05e9..64d89cc 100644 Binary files a/main/point.rknn and b/main/point.rknn differ