From e637064f01cf29505e819e0974c2378e6c340c33 Mon Sep 17 00:00:00 2001 From: hjw <1576345902@qq.com> Date: Thu, 29 Aug 2024 08:43:46 +0000 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20Vision/yolov8=5Fpt=5Fseg.p?= =?UTF-8?q?y?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Vision/yolov8_pt_seg.py | 365 ---------------------------------------- 1 file changed, 365 deletions(-) delete mode 100644 Vision/yolov8_pt_seg.py diff --git a/Vision/yolov8_pt_seg.py b/Vision/yolov8_pt_seg.py deleted file mode 100644 index dd91001..0000000 --- a/Vision/yolov8_pt_seg.py +++ /dev/null @@ -1,365 +0,0 @@ - -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -''' -@Project -> File :yolov8_segment.py -@IDE :PyCharm -@Author :hjw -@Version : 1.0.0 -@Date :2024/8/20 9:25 -@Function : -''' - -# yolov8 pt模型,实例分割推理 -import cv2 -import time -import numpy as np -import torch, torchvision -import torch.nn.functional as F - - - -def load_model(model_path, device): - model = torch.load(model_path, map_location=device) - category_list = model.get('CLASSES', model.get('model').names) - model = (model.get('ema') or model['model']).float() # FP32 model - model.__setattr__('CLASSES', category_list) - model.fuse().eval() - #model = model.cuda() - return model - - -def data_preprocess(model, img, img_scale, device): - stride, auto = 32, True - stride = max(int(model.stride.max()), 32) - img = letterbox(img, new_shape=img_scale, stride=stride, auto=auto)[0] # padded resize - img = np.ascontiguousarray(img.transpose((2, 0, 1))[::-1]) # HWC to CHW, BGR to RGB,contiguous - #img = torch.from_numpy(img) # ndarray to tensor - img = torch.from_numpy(img).to(device) - #img = torch.from_numpy(img) - img = img.float() # uint8 to fp32 - img /= 255 # 0 - 255 to 0.0 - 1.0 - if len(img.shape) == 3: - img = img[None] # expand for batch dim - return img - - -def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): - # Resize and pad image while meeting stride-multiple constraints - shape = im.shape[:2] # current shape [height, width] - if isinstance(new_shape, int): - new_shape = (new_shape, new_shape) - - # Scale ratio (new / old) - r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) - if not scaleup: # only scale down, do not scale up (for better val mAP) - r = min(r, 1.0) - - # Compute padding - ratio = r, r # width, height ratios - new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding - if auto: # minimum rectangle - dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding - elif scaleFill: # stretch - dw, dh = 0.0, 0.0 - new_unpad = (new_shape[1], new_shape[0]) - ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios - - dw /= 2 # divide padding into 2 sides - dh /= 2 - - if shape[::-1] != new_unpad: # resize - im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) - top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) - left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) - im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border - return im, ratio, (dw, dh) - - -def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, - labels=(), max_det=300, nc=0, max_time_img=0.05, max_nms=30000, max_wh=7680, ): - # Checks - assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' - assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' - if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out) - prediction = prediction[0] # select only inference output - - device = prediction.device - mps = 'mps' in device.type # Apple MPS - if mps: # MPS not fully supported yet, convert tensors to CPU before NMS - prediction = prediction.cpu() - bs = prediction.shape[0] # batch size - nc = nc or (prediction.shape[1] - 4) # number of classes - nm = prediction.shape[1] - nc - 4 - mi = 4 + nc # mask start index - xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates - - # Settings - # min_wh = 2 # (pixels) minimum box width and height - time_limit = 0.5 + max_time_img * bs # seconds to quit after - multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) - - prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84) - prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy - - t = time.time() - output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs - for xi, x in enumerate(prediction): # image index, image inference - # Apply constraints - # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height - x = x[xc[xi]] # confidence - - # Cat apriori labels if autolabelling - if labels and len(labels[xi]): - lb = labels[xi] - v = torch.zeros((len(lb), nc + nm + 4), device=x.device) - v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box - v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls - x = torch.cat((x, v), 0) - - # If none remain process next image - if not x.shape[0]: - continue - - # Detections matrix nx6 (xyxy, conf, cls) - box, cls, mask = x.split((4, nc, nm), 1) - - if multi_label: - i, j = torch.where(cls > conf_thres) - x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) - else: # best class only - conf, j = cls.max(1, keepdim=True) - x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] - - # Filter by class - if classes is not None: - x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] - - # Check shape - n = x.shape[0] # number of boxes - if not n: # no boxes - continue - if n > max_nms: # excess boxes - x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes - - # Batched NMS - c = x[:, 5:6] * (0 if agnostic else max_wh) # classes - boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores - i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS - i = i[:max_det] # limit detections - - output[xi] = x[i] - if mps: - output[xi] = output[xi].to(device) - if (time.time() - t) > time_limit: - print(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') - break # time limit exceeded - return output - - -def xywh2xyxy(x): - """ - Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the - top-left corner and (x2, y2) is the bottom-right corner. - Args: - x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format. - Returns: - y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. - """ - assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}' - y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy - dw = x[..., 2] / 2 # half-width - dh = x[..., 3] / 2 # half-height - y[..., 0] = x[..., 0] - dw # top left x - y[..., 1] = x[..., 1] - dh # top left y - y[..., 2] = x[..., 0] + dw # bottom right x - y[..., 3] = x[..., 1] + dh # bottom right y - return y - - -def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True): - """ - Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in - (img1_shape) to the shape of a different image (img0_shape). - Args: - img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width). - boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2) - img0_shape (tuple): the shape of the target image, in the format of (height, width). - ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be - calculated based on the size difference between the two images. - padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular - rescaling. - Returns: - boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2) - """ - if ratio_pad is None: # calculate from img0_shape - gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new - pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round( - (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding - else: - gain = ratio_pad[0][0] - pad = ratio_pad[1] - - if padding: - boxes[..., [0, 2]] -= pad[0] # x padding - boxes[..., [1, 3]] -= pad[1] # y padding - boxes[..., :4] /= gain - clip_boxes(boxes, img0_shape) - return boxes - - -def clip_boxes(boxes, shape): - """ - Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape. - - Args: - boxes (torch.Tensor): the bounding boxes to clip - shape (tuple): the shape of the image - """ - if isinstance(boxes, torch.Tensor): # faster individually - boxes[..., 0].clamp_(0, shape[1]) # x1 - boxes[..., 1].clamp_(0, shape[0]) # y1 - boxes[..., 2].clamp_(0, shape[1]) # x2 - boxes[..., 3].clamp_(0, shape[0]) # y2 - else: # np.array (faster grouped) - boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2 - boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2 - - -def process_mask(protos, masks_in, bboxes, shape, ori_shape): - """ - Crop after upsample. - proto_out: [mask_dim, mask_h, mask_w] - out_masks: [n, mask_dim], n is number of masks after nms - bboxes: [n, 4], n is number of masks after nms - shape:input_image_size, (h, w) - - return: h, w, n - """ - # mask转换成自定义尺寸 - c, mh, mw = protos.shape # CHW - masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) - masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW - # mask转换成原图尺寸 - gain = min(shape[0] / ori_shape[0], shape[1] / ori_shape[1]) # gain = old / new - pad = (shape[1] - ori_shape[1] * gain) / 2, (shape[0] - ori_shape[0] * gain) / 2 # wh padding - top, left = int(pad[1]), int(pad[0]) # y, x - bottom, right = int(shape[0] - pad[1]), int(shape[1] - pad[0]) - if len(masks.shape) < 2: - raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') - masks = masks[:, top:bottom, left:right] - masks = F.interpolate(masks[None], ori_shape, mode='bilinear', align_corners=False)[0] # CHW - # 裁去box以外的图像 - crop_masks = [] - for i, mask in enumerate(masks): - mask = mask[int(bboxes[i][1]):int(bboxes[i][3]), int(bboxes[i][0]):int(bboxes[i][2])] - crop_masks.append(mask.gt_(0.5)) - return crop_masks - - -def plot_result(det_cpu, dst_img, masks, category_names): - circle_max_contour = [] - concrete_max_contour = [] - for i, item in enumerate(det_cpu): - # rand_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) - # 画box - box_x1, box_y1, box_x2, box_y2 = item[0:4].astype(np.int32) - label = category_names[int(item[5])] - rand_color = (0, 255, 255) - #cv2.rectangle(dst_img, (box_x1, box_y1), (box_x2, box_y2), color=rand_color, thickness=2) - score = item[4] - org = (int((box_x1+box_x2)/2), int((box_y1+box_y2)/2)) - text = '{}|{:.2f}'.format(label, score) - cv2.putText(dst_img, text, org=org, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.8, color=rand_color, thickness=2) - # 画mask - #mask = masks[i].cpu().numpy().astype(int) - mask = masks[i].cpu().data.numpy().astype(int) - #mask = masks[i].numpy().astype(int) - bbox_image = dst_img[box_y1:box_y2, box_x1:box_x2] - h, w = box_y2 - box_y1, box_x2 - box_x1 - mask_colored = np.zeros((h, w, 3), dtype=np.uint8) - mask_colored[np.where(mask)] = rand_color - ################################## - imgray = cv2.cvtColor(mask_colored, cv2.COLOR_BGR2GRAY) - # cv2.imshow('mask',imgray) - # cv2.waitKey(1) - # 2、二进制图像 - ret, binary = cv2.threshold(imgray, 10, 255, 0) - # 阈值 二进制图像 - # cv2.imshow('bin',binary) - # cv2.waitKey(1) - contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) - max_contour = None - max_perimeter = 0 - for contour in contours: - perimeter = cv2.arcLength(contour, True) - if perimeter > max_perimeter: - max_perimeter = perimeter - max_contour = contour - rect = cv2.minAreaRect(max_contour) - # cv2.boxPoints可以将轮廓点转换为四个角点坐标 - box = cv2.boxPoints(rect) - # 这一步不影响后面的画图,但是可以保证四个角点坐标为顺时针 - startidx = box.sum(axis=1).argmin() - box = np.roll(box, 4 - startidx, 0) - # 在原图上画出预测的外接矩形 - box = box.reshape((-1, 1, 2)).astype(np.int32) - box = box + [[[box_x1, box_y1]], [[box_x1, box_y1]], [[box_x1, box_y1]], [[box_x1, box_y1]]] - cv2.polylines(dst_img, [box], True, (0, 255, 0), 2) - - return dst_img - # cv2.imwrite('rs.jpg', dst_img) - - -class yolov8_segment(): - def __init__(self): - super(yolov8_segment, self).__init__() - - - def load_model(self, model_path, device): - self.model = load_model(model_path, device) - self.device = device - - def model_inference(self, frame, upd_arr): - img = data_preprocess(self.model, frame, [640, 640], self.device) - - # 推理 - ori_img = frame.copy() - result = self.model(img, augment=False) - preds = result[0] - proto = result[1][-1] - # NMS - det = non_max_suppression(preds, conf_thres=0.25, iou_thres=0.3, nc=len(self.model.CLASSES))[0] - if det.shape[0] != 0: - # bbox还原至原图尺寸 - det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], ori_img.shape) - # mask转换成原图尺寸并做裁剪 - masks = process_mask(proto[0], det[:, 6:], det[:, :4], img.shape[2:], ori_img.shape[0:2]) - category_names = self.model.CLASSES - # 画图 - # result_frame = plot_result(det.cpu().data.numpy(), ori_img, masks, category_names) - return 1 , det.cpu().data.numpy(), ori_img, masks, category_names - else: - return 0 , None, None, None, None - - def clear(self): - del self.model - -# model = yolov8_segment() -# model.load_model('./pt_model/yolov8n-seg.pt','cpu') -# cap = cv2.VideoCapture(1) -# while True: -# # count_file = len(os.listdir('E:\\A_panckg\\cv_sdk_discharge\\video_save')) # 数量 -# ret, frame = cap.read() -# if ret: -# frame_save_count = 1000 -# frame = cv2.resize(frame, (1280, 720)) -# img = model.model_inference(frame, 0) -# cv2.imshow("imgrr", img) -# cv2.waitKey(1) -# #videoWriter(img) - - - -