Files
AutoControlSystem-git/Vision/yolo/yolov8_pt_seg.py
2024-08-29 08:58:08 +00:00

366 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project -> File yolov8_segment.py
@IDE PyCharm
@Author hjw
@Version : 1.0.0
@Date 2024/8/20 9:25
@Function
'''
# yolov8 pt模型实例分割推理
import cv2
import time
import numpy as np
import torch, torchvision
import torch.nn.functional as F
def load_model(model_path, device):
model = torch.load(model_path, map_location=device)
category_list = model.get('CLASSES', model.get('model').names)
model = (model.get('ema') or model['model']).float() # FP32 model
model.__setattr__('CLASSES', category_list)
model.fuse().eval()
#model = model.cuda()
return model
def data_preprocess(model, img, img_scale, device):
stride, auto = 32, True
stride = max(int(model.stride.max()), 32)
img = letterbox(img, new_shape=img_scale, stride=stride, auto=auto)[0] # padded resize
img = np.ascontiguousarray(img.transpose((2, 0, 1))[::-1]) # HWC to CHW, BGR to RGB,contiguous
#img = torch.from_numpy(img) # ndarray to tensor
img = torch.from_numpy(img).to(device)
#img = torch.from_numpy(img)
img = img.float() # uint8 to fp32
img /= 255 # 0 - 255 to 0.0 - 1.0
if len(img.shape) == 3:
img = img[None] # expand for batch dim
return img
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
labels=(), max_det=300, nc=0, max_time_img=0.05, max_nms=30000, max_wh=7680, ):
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
prediction = prediction[0] # select only inference output
device = prediction.device
mps = 'mps' in device.type # Apple MPS
if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
prediction = prediction.cpu()
bs = prediction.shape[0] # batch size
nc = nc or (prediction.shape[1] - 4) # number of classes
nm = prediction.shape[1] - nc - 4
mi = 4 + nc # mask start index
xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
# Settings
# min_wh = 2 # (pixels) minimum box width and height
time_limit = 0.5 + max_time_img * bs # seconds to quit after
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
t = time.time()
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
lb = labels[xi]
v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Detections matrix nx6 (xyxy, conf, cls)
box, cls, mask = x.split((4, nc, nm), 1)
if multi_label:
i, j = torch.where(cls > conf_thres)
x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
else: # best class only
conf, j = cls.max(1, keepdim=True)
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
if n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
i = i[:max_det] # limit detections
output[xi] = x[i]
if mps:
output[xi] = output[xi].to(device)
if (time.time() - t) > time_limit:
print(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
break # time limit exceeded
return output
def xywh2xyxy(x):
"""
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
top-left corner and (x2, y2) is the bottom-right corner.
Args:
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
Returns:
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
"""
assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
dw = x[..., 2] / 2 # half-width
dh = x[..., 3] / 2 # half-height
y[..., 0] = x[..., 0] - dw # top left x
y[..., 1] = x[..., 1] - dh # top left y
y[..., 2] = x[..., 0] + dw # bottom right x
y[..., 3] = x[..., 1] + dh # bottom right y
return y
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
"""
Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
(img1_shape) to the shape of a different image (img0_shape).
Args:
img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
img0_shape (tuple): the shape of the target image, in the format of (height, width).
ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
calculated based on the size difference between the two images.
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
rescaling.
Returns:
boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
"""
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
(img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
if padding:
boxes[..., [0, 2]] -= pad[0] # x padding
boxes[..., [1, 3]] -= pad[1] # y padding
boxes[..., :4] /= gain
clip_boxes(boxes, img0_shape)
return boxes
def clip_boxes(boxes, shape):
"""
Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
Args:
boxes (torch.Tensor): the bounding boxes to clip
shape (tuple): the shape of the image
"""
if isinstance(boxes, torch.Tensor): # faster individually
boxes[..., 0].clamp_(0, shape[1]) # x1
boxes[..., 1].clamp_(0, shape[0]) # y1
boxes[..., 2].clamp_(0, shape[1]) # x2
boxes[..., 3].clamp_(0, shape[0]) # y2
else: # np.array (faster grouped)
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
def process_mask(protos, masks_in, bboxes, shape, ori_shape):
"""
Crop after upsample.
proto_out: [mask_dim, mask_h, mask_w]
out_masks: [n, mask_dim], n is number of masks after nms
bboxes: [n, 4], n is number of masks after nms
shape:input_image_size, (h, w)
return: h, w, n
"""
# mask转换成自定义尺寸
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
# mask转换成原图尺寸
gain = min(shape[0] / ori_shape[0], shape[1] / ori_shape[1]) # gain = old / new
pad = (shape[1] - ori_shape[1] * gain) / 2, (shape[0] - ori_shape[0] * gain) / 2 # wh padding
top, left = int(pad[1]), int(pad[0]) # y, x
bottom, right = int(shape[0] - pad[1]), int(shape[1] - pad[0])
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
masks = masks[:, top:bottom, left:right]
masks = F.interpolate(masks[None], ori_shape, mode='bilinear', align_corners=False)[0] # CHW
# 裁去box以外的图像
crop_masks = []
for i, mask in enumerate(masks):
mask = mask[int(bboxes[i][1]):int(bboxes[i][3]), int(bboxes[i][0]):int(bboxes[i][2])]
crop_masks.append(mask.gt_(0.5))
return crop_masks
def plot_result(det_cpu, dst_img, masks, category_names):
circle_max_contour = []
concrete_max_contour = []
for i, item in enumerate(det_cpu):
# rand_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
# 画box
box_x1, box_y1, box_x2, box_y2 = item[0:4].astype(np.int32)
label = category_names[int(item[5])]
rand_color = (0, 255, 255)
#cv2.rectangle(dst_img, (box_x1, box_y1), (box_x2, box_y2), color=rand_color, thickness=2)
score = item[4]
org = (int((box_x1+box_x2)/2), int((box_y1+box_y2)/2))
text = '{}|{:.2f}'.format(label, score)
cv2.putText(dst_img, text, org=org, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.8, color=rand_color, thickness=2)
# 画mask
#mask = masks[i].cpu().numpy().astype(int)
mask = masks[i].cpu().data.numpy().astype(int)
#mask = masks[i].numpy().astype(int)
bbox_image = dst_img[box_y1:box_y2, box_x1:box_x2]
h, w = box_y2 - box_y1, box_x2 - box_x1
mask_colored = np.zeros((h, w, 3), dtype=np.uint8)
mask_colored[np.where(mask)] = rand_color
##################################
imgray = cv2.cvtColor(mask_colored, cv2.COLOR_BGR2GRAY)
# cv2.imshow('mask',imgray)
# cv2.waitKey(1)
# 2、二进制图像
ret, binary = cv2.threshold(imgray, 10, 255, 0)
# 阈值 二进制图像
# cv2.imshow('bin',binary)
# cv2.waitKey(1)
contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
max_contour = None
max_perimeter = 0
for contour in contours:
perimeter = cv2.arcLength(contour, True)
if perimeter > max_perimeter:
max_perimeter = perimeter
max_contour = contour
rect = cv2.minAreaRect(max_contour)
# cv2.boxPoints可以将轮廓点转换为四个角点坐标
box = cv2.boxPoints(rect)
# 这一步不影响后面的画图,但是可以保证四个角点坐标为顺时针
startidx = box.sum(axis=1).argmin()
box = np.roll(box, 4 - startidx, 0)
# 在原图上画出预测的外接矩形
box = box.reshape((-1, 1, 2)).astype(np.int32)
box = box + [[[box_x1, box_y1]], [[box_x1, box_y1]], [[box_x1, box_y1]], [[box_x1, box_y1]]]
cv2.polylines(dst_img, [box], True, (0, 255, 0), 2)
return dst_img
# cv2.imwrite('rs.jpg', dst_img)
class yolov8_segment():
def __init__(self):
super(yolov8_segment, self).__init__()
def load_model(self, model_path, device):
self.model = load_model(model_path, device)
self.device = device
def model_inference(self, frame, upd_arr):
img = data_preprocess(self.model, frame, [640, 640], self.device)
# 推理
ori_img = frame.copy()
result = self.model(img, augment=False)
preds = result[0]
proto = result[1][-1]
# NMS
det = non_max_suppression(preds, conf_thres=0.25, iou_thres=0.3, nc=len(self.model.CLASSES))[0]
if det.shape[0] != 0:
# bbox还原至原图尺寸
det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], ori_img.shape)
# mask转换成原图尺寸并做裁剪
masks = process_mask(proto[0], det[:, 6:], det[:, :4], img.shape[2:], ori_img.shape[0:2])
category_names = self.model.CLASSES
# 画图
# result_frame = plot_result(det.cpu().data.numpy(), ori_img, masks, category_names)
return 1 , det.cpu().data.numpy(), ori_img, masks, category_names
else:
return 0 , None, None, None, None
def clear(self):
del self.model
# model = yolov8_segment()
# model.load_model('./pt_model/yolov8n-seg.pt','cpu')
# cap = cv2.VideoCapture(1)
# while True:
# # count_file = len(os.listdir('E:\\A_panckg\\cv_sdk_discharge\\video_save')) # 数量
# ret, frame = cap.read()
# if ret:
# frame_save_count = 1000
# frame = cv2.resize(frame, (1280, 720))
# img = model.model_inference(frame, 0)
# cv2.imshow("imgrr", img)
# cv2.waitKey(1)
# #videoWriter(img)