#!/usr/bin/env python # -*- coding: utf-8 -*- ''' # @Time : 2025/3/18 15:29 # @Author : hjw # @File : yolov8_pt_pose.py ''' import os.path import random import cv2 import numpy as np import torch import time from ultralytics.nn.autobackend import AutoBackend from ultralytics.utils import ops class yolov8_pose: def __init__(self, weights, cuda, conf_thres=0.45, iou_thres=0.45) -> None: """ weights = r'./runs/pose/train25/weights/last.pt' cuda = 'cpu' save_path = "./img_test" """ self.imgsz = 640 self.device = cuda self.model = AutoBackend(weights, device=torch.device(cuda)) self.model.eval() self.names = self.model.names self.half = False self.conf = conf_thres self.iou = iou_thres self.color = {"font": (255, 255, 255)} self.color.update( {self.names[i]: (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for i in range(len(self.names))}) # self.skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], # [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] # pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255], # [153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255], # [255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102], # [51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]], dtype=np.uint8) # self.kpt_color = pose_palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]] # self.limb_color = pose_palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]] self.skeleton = [[1, 2], [2, 3], [3, 4]] pose_palette = np.array([[255, 0, 0], [255, 153, 51], [255, 3, 102], [0, 230, 0]], dtype=np.uint8) self.kpt_color = pose_palette[[0, 1, 2, 3]] self.limb_color = pose_palette[[0, 1, 2, 3]] # print(len(self.skeleton )) # print(len(pose_palette)) # print(len(self.kpt_color)) # print(len(self.limb_color)) def model_inference(self, img_src): img = self.precess_image(img_src, self.imgsz, self.half, self.device) preds = self.model(img) # shape [1, 56, 6300] det = ops.non_max_suppression(preds, self.conf, self.iou, classes=None, agnostic=False, max_det=300, nc=len(self.names)) point_xy = [] name_list = [] score_list = [] for i, pred in enumerate(det): lw = max(round(sum(img_src.shape) / 2 * 0.003), 2) # line width tf = max(lw - 1, 1) # font thickness sf = lw / 3 # font scale pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], img_src.shape) pred_bbox = pred[:, :6].cpu().detach().numpy() pred_kpts = pred[:, 6:].view(len(pred), *self.model.kpt_shape) if len(pred) else pred[:, 6:] pred_kpts = ops.scale_coords(img.shape[2:], pred_kpts, img_src.shape) pred_kpts = pred_kpts.cpu().detach().numpy() point_xy = [] for kpts, bbox in zip(pred_kpts, pred_bbox): box = bbox[:4] score = bbox[4] name = self.names[bbox[5]] shape = (640, 640) radius = 5 kpt_line = True nkpt, ndim = kpts.shape is_pose = nkpt == 4 and ndim in {2, 3} kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting xy = [] for i, k in enumerate(kpts): color_k = [int(x) for x in self.kpt_color[i]] x_coord, y_coord = k[0], k[1] if x_coord % shape[1] != 0 and y_coord % shape[0] != 0: if len(k) == 3: conf = k[2] if conf < 0.5: continue xy.append([int(x_coord), int(y_coord)]) cv2.circle(img_src, (int(x_coord), int(y_coord)), radius, color_k, -1, lineType=cv2.LINE_AA) point_xy.append(xy) name_list.append(name) score_list.append(score) return True, point_xy, name_list, score_list def draw_box(self, img_src, box, conf, cls_name, lw, sf, tf): color = self.color[cls_name] label = f'{cls_name} {conf}' p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) # 绘制矩形框 cv2.rectangle(img_src, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA) # text width, height w, h = cv2.getTextSize(label, 0, fontScale=sf, thickness=tf)[0] # label fits outside box outside = box[1] - h - 3 >= 0 p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 # 绘制矩形框填充 cv2.rectangle(img_src, p1, p2, color, -1, cv2.LINE_AA) # 绘制标签 cv2.putText(img_src, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, sf, self.color["font"], thickness=2, lineType=cv2.LINE_AA) def draw_kpts(self, img_src, kpts, box, score, name, lw, sf, tf, shape=(640, 640), radius=5, kpt_line=True): flag = False nkpt, ndim = kpts.shape is_pose = nkpt == 4 and ndim in {2, 3} kpt_line &= is_pose # `kpt_line=True` for now only supports human pose plotting for i, k in enumerate(kpts): color_k = [int(x) for x in self.kpt_color[i]] x_coord, y_coord = k[0], k[1] if x_coord % shape[1] != 0 and y_coord % shape[0] != 0: if len(k) == 3: conf = k[2] if conf < 0.5: continue cv2.circle(img_src, (int(x_coord), int(y_coord)), radius, color_k, -1, lineType=cv2.LINE_AA) if kpt_line: ndim = kpts.shape[-1] for i, sk in enumerate(self.skeleton): pos1 = (int(kpts[(sk[0] - 1), 0]), int(kpts[(sk[0] - 1), 1])) pos2 = (int(kpts[(sk[1] - 1), 0]), int(kpts[(sk[1] - 1), 1])) if ndim == 3: conf1 = kpts[(sk[0] - 1), 2] conf2 = kpts[(sk[1] - 1), 2] if conf1 < 0.5 or conf2 < 0.5: continue if pos1[0] % shape[1] == 0 or pos1[1] % shape[0] == 0 or pos1[0] < 0 or pos1[1] < 0: continue if pos2[0] % shape[1] == 0 or pos2[1] % shape[0] == 0 or pos2[0] < 0 or pos2[1] < 0: continue cv2.line(img_src, pos1, pos2, [int(x) for x in self.limb_color[i]], thickness=2, lineType=cv2.LINE_AA) flag = True if flag: self.draw_box(img_src, box, score, name, lw, sf, tf) @staticmethod def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), scaleup=True, stride=32): # Resize and pad image while meeting stride-multiple constraints shape = im.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scaleup: # only scale down, do not scale up (for better val mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding # minimum rectangle dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return im, ratio, (dw, dh) def precess_image(self, img_src, img_size, half, device): # Padded resize img = self.letterbox(img_src, img_size)[0] # Convert img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim return img # if __name__ == '__main__': # weights = r'./runs/pose/train25/weights/last.pt' # cuda = 'cpu' # save_path = "./img_test" # start = time.time() # if not os.path.exists(save_path): # os.mkdir(save_path) # # model = yolov8_pose(weights, cuda, 0.45, 0.45) # # img_path = r'./1106-08-pe-518.png' # model.infer(img_path, save_path) # end = time.time() # print('推理时间:',end -start)