源码

2025-08-15 12:08:30 +08:00
parent dca51db4eb
commit eaa1cee17f
82 changed files with 3398316 additions and 554437 deletions
--- a/Vision/model/data/0925_01_28.png
+++ b/Vision/model/data/0925_01_28.png
--- a/Vision/model/data/2024_11_29_10_05_58.png
+++ b/Vision/model/data/2024_11_29_10_05_58.png
--- a/Vision/model/data/2024_11_29_10_05_58.xyz
+++ b/Vision/model/data/2024_11_29_10_05_58.xyz
--- a/Vision/model/data/2024_12_16_18_32_40.png
+++ b/Vision/model/data/2024_12_16_18_32_40.png
--- a/Vision/model/data/test0911.png
+++ b/Vision/model/data/test0911.png
--- a/Vision/model/data/test0911.xyz
+++ b/Vision/model/data/test0911.xyz
--- a/Vision/model/openvino/metadata.yaml
+++ b/Vision/model/openvino/metadata.yaml
@ -0,0 +1,15 @@
+description: Ultralytics YOLOv8n-seg model trained on D:\work\ultralytics-main\ultralytics\cfg\datasets\coco8-seg-dy.yaml
+author: Ultralytics
+date: '2024-12-19T09:48:12.419566'
+version: 8.2.86
+license: AGPL-3.0 License (https://ultralytics.com/license)
+docs: https://docs.ultralytics.com
+stride: 32
+task: segment
+batch: 1
+imgsz:
+- 640
+- 640
+names:
+  0: zheng
+  1: fan
--- a/Vision/model/openvino/one_bag.bin
+++ b/Vision/model/openvino/one_bag.bin
--- a/Vision/model/openvino/one_bag.xml
+++ b/Vision/model/openvino/one_bag.xml
--- a/Vision/model/pt/bagNum.pt
+++ b/Vision/model/pt/bagNum.pt
--- a/Vision/model/pt/bag_collection.pt
+++ b/Vision/model/pt/bag_collection.pt
--- a/Vision/model/pt/best.pt
+++ b/Vision/model/pt/best.pt
--- a/Vision/model/pt/one_bag.pt
+++ b/Vision/model/pt/one_bag.pt
--- a/Vision/model/pt/person.pt
+++ b/Vision/model/pt/person.pt
--- a/Vision/model/pt/person_detect.pt
+++ b/Vision/model/pt/person_detect.pt
--- a/Vision/tool/CameraRVC.py
+++ b/Vision/tool/CameraRVC.py
@ -1,149 +1,149 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-'''
-@Project ：my_work 
-@File    ：camera.py
-@IDE     ：PyCharm 
-@Author  ：hjw
-@Date    ：2024/8/13 11:34 
-'''
-import PyRVC as RVC
-import numpy as np
-
-class camera_rvc:
-
-    def __init__(self):
-        self.caminit_isok = False
-        RVC.SystemInit()
-
-        # Choose RVC X Camera type (USB, GigE or All)
-        opt = RVC.SystemListDeviceTypeEnum.GigE
-
-        # Scan all RVC X Camera devices.
-        ret, devices = RVC.SystemListDevices(opt)
-        print("RVC X Camera devices number:%d" % len(devices))
-
-        #  Find whether any RVC X Camera is connected or not.
-        if len(devices) == 0:
-            print("Can not find any RVC X Camera!")
-            RVC.SystemShutdown()
-        else:
-            print("devices size = %d" % len(devices))
-
-            # Create a RVC X Camera and choose use left side camera.
-            self.x = RVC.X1.Create(devices[0], RVC.CameraID_Left)
-            # x = RVC.X1.Create(devices[0], RVC.CameraID_Right)
-
-            # Test RVC X Camera is valid or not.
-            if self.x.IsValid() == True:
-                print("RVC X Camera is valid!")
-                # Open RVC X Camera.
-                ret1 = self.x.Open()
-                # Test RVC X Camera is opened or not.
-                if ret1 and self.x.IsOpen() == True:
-                    print("RVC X Camera is opened!")
-                    self.caminit_isok = True
-                else:
-                    print("RVC X Camera is not opened!")
-                    RVC.X1.Destroy(self.x)
-                    RVC.SystemShutdown()
-                    self.caminit_isok = False
-            else:
-                print("RVC X Camera is not valid!")
-                RVC.X1.Destroy(self.x)
-                RVC.SystemShutdown()
-                self.caminit_isok = False
-
-
-    def get_img(self):
-        ""
-        '''
-        :param api: None
-        :return: ret ,img 
-        '''
-        if self.caminit_isok == False:
-            return 0, None
-        else:
-            # Capture a point map and a image.
-            ret2 = self.x.Capture()
-            # Create saving address of image and point map.
-
-            if ret2 == True:
-                print("RVC X Camera capture successed!")
-
-                # Get image data and image size.
-                img = self.x.GetImage()
-                # Convert image to array and save it.
-                img = np.array(img, copy=False)
-                return 1, img
-            else:
-                print("RVC X Camera capture failed!")
-                self.x.Close()
-                RVC.X1.Destroy(self.x)
-                RVC.SystemShutdown()
-                return 0, None
-
-    def get_point_map(self):
-        ""
-        '''
-        :param api: None
-        :return: img 
-        '''
-        if self.caminit_isok == False:
-            return 0, None
-        else:
-            # Capture a point map and a image.
-            ret2 = self.x.Capture()
-            # Create saving address of image and point map.
-
-            if ret2 == True:
-                print("RVC X Camera capture successed!")
-                # Convert point map (m) to array and save it.
-                pm = np.array(self.x.GetPointMap(), copy=False)
-                return 1, pm
-            else:
-                print("RVC X Camera capture failed!")
-                self.x.Close()
-                RVC.X1.Destroy(self.x)
-                RVC.SystemShutdown()
-                return 0, None
-
-    def get_img_and_point_map(self):
-        ""
-        '''
-        :param api: None
-        :return: ret , img, point_map
-        '''
-        if self.caminit_isok == False:
-            return 0, None, None
-        else:
-            # Capture a point map and a image.
-            ret2 = self.x.Capture()
-            # Create saving address of image and point map.
-
-            if ret2 == True:
-                print("RVC X Camera capture successed!")
-
-                # Get image data and image size.
-                img = self.x.GetImage()
-                # Convert image to array and save it.
-                img = np.array(img, copy=False)
-
-                # Convert point map (m) to array and save it.
-                pm = np.array(self.x.GetPointMap(), copy=False)
-                return 1, img, pm
-            else:
-                print("RVC X Camera capture failed!")
-                self.x.Close()
-                RVC.X1.Destroy(self.x)
-                RVC.SystemShutdown()
-                return 0, None, None
-
-    def release(self):
-        if self.caminit_isok == False:
-            RVC.SystemShutdown()
-        else:
-            RVC.X1.Destroy(self.x)
-            RVC.SystemShutdown()
-
-
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project ：my_work 
+@File    ：camera.py
+@IDE     ：PyCharm 
+@Author  ：hjw
+@Date    ：2024/8/13 11:34 
+'''
+import PyRVC as RVC
+import numpy as np
+
+class camera_rvc:
+
+    def __init__(self):
+        self.caminit_isok = False
+        RVC.SystemInit()
+
+        # Choose RVC X Camera type (USB, GigE or All)
+        opt = RVC.SystemListDeviceTypeEnum.GigE
+
+        # Scan all RVC X Camera devices.
+        ret, devices = RVC.SystemListDevices(opt)
+        print("RVC X Camera devices number:%d" % len(devices))
+
+        #  Find whether any RVC X Camera is connected or not.
+        if len(devices) == 0:
+            print("Can not find any RVC X Camera!")
+            RVC.SystemShutdown()
+        else:
+            print("devices size = %d" % len(devices))
+
+            # Create a RVC X Camera and choose use left side camera.
+            self.x = RVC.X1.Create(devices[0], RVC.CameraID_Left)
+            # x = RVC.X1.Create(devices[0], RVC.CameraID_Right)
+
+            # Test RVC X Camera is valid or not.
+            if self.x.IsValid() == True:
+                print("RVC X Camera is valid!")
+                # Open RVC X Camera.
+                ret1 = self.x.Open()
+                # Test RVC X Camera is opened or not.
+                if ret1 and self.x.IsOpen() == True:
+                    print("RVC X Camera is opened!")
+                    self.caminit_isok = True
+                else:
+                    print("RVC X Camera is not opened!")
+                    RVC.X1.Destroy(self.x)
+                    RVC.SystemShutdown()
+                    self.caminit_isok = False
+            else:
+                print("RVC X Camera is not valid!")
+                RVC.X1.Destroy(self.x)
+                RVC.SystemShutdown()
+                self.caminit_isok = False
+
+
+    def get_img(self):
+        ""
+        '''
+        :param api: None
+        :return: ret ,img 
+        '''
+        if self.caminit_isok == False:
+            return 0, None
+        else:
+            # Capture a point map and a image.
+            ret2 = self.x.Capture()
+            # Create saving address of image and point map.
+
+            if ret2 == True:
+                print("RVC X Camera capture successed!")
+
+                # Get image data and image size.
+                img = self.x.GetImage()
+                # Convert image to array and save it.
+                img = np.array(img, copy=False)
+                return 1, img
+            else:
+                print("RVC X Camera capture failed!")
+                self.x.Close()
+                RVC.X1.Destroy(self.x)
+                RVC.SystemShutdown()
+                return 0, None
+
+    def get_point_map(self):
+        ""
+        '''
+        :param api: None
+        :return: img 
+        '''
+        if self.caminit_isok == False:
+            return 0, None
+        else:
+            # Capture a point map and a image.
+            ret2 = self.x.Capture()
+            # Create saving address of image and point map.
+
+            if ret2 == True:
+                print("RVC X Camera capture successed!")
+                # Convert point map (m) to array and save it.
+                pm = np.array(self.x.GetPointMap(), copy=False)
+                return 1, pm
+            else:
+                print("RVC X Camera capture failed!")
+                self.x.Close()
+                RVC.X1.Destroy(self.x)
+                RVC.SystemShutdown()
+                return 0, None
+
+    def get_img_and_point_map(self):
+        ""
+        '''
+        :param api: None
+        :return: ret , img, point_map
+        '''
+        if self.caminit_isok == False:
+            return 0, None, None
+        else:
+            # Capture a point map and a image.
+            ret2 = self.x.Capture()
+            # Create saving address of image and point map.
+
+            if ret2 == True:
+                print("RVC X Camera capture successed!")
+
+                # Get image data and image size.
+                img = self.x.GetImage()
+                # Convert image to array and save it.
+                img = np.array(img, copy=False)
+
+                # Convert point map (m) to array and save it.
+                pm = np.array(self.x.GetPointMap(), copy=False)
+                return 1, img, pm
+            else:
+                print("RVC X Camera capture failed!")
+                self.x.Close()
+                RVC.X1.Destroy(self.x)
+                RVC.SystemShutdown()
+                return 0, None, None
+
+    def release(self):
+        if self.caminit_isok == False:
+            RVC.SystemShutdown()
+        else:
+            RVC.X1.Destroy(self.x)
+            RVC.SystemShutdown()
+
+
--- a/Vision/yolo/yolov8_pt_seg.py
+++ b/Vision/yolo/yolov8_pt_seg.py
@ -1,365 +1,365 @@
-
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-'''
-@Project -> File   ：yolov8_segment.py
-@IDE    ：PyCharm
-@Author ：hjw
-@Version : 1.0.0
-@Date   ：2024/8/20 9:25
-@Function   ：
-'''
-
-# yolov8 pt模型，实例分割推理
-import cv2
-import time
-import numpy as np
-import torch, torchvision
-import torch.nn.functional as F
-
-
-
-def load_model(model_path, device):
-    model = torch.load(model_path, map_location=device)
-    category_list = model.get('CLASSES', model.get('model').names)
-    model = (model.get('ema') or model['model']).float()  # FP32 model
-    model.__setattr__('CLASSES', category_list)
-    model.fuse().eval()
-    #model = model.cuda()
-    return model
-
-
-def data_preprocess(model, img, img_scale, device):
-    stride, auto = 32, True
-    stride = max(int(model.stride.max()), 32)
-    img = letterbox(img, new_shape=img_scale, stride=stride, auto=auto)[0]  # padded resize
-    img = np.ascontiguousarray(img.transpose((2, 0, 1))[::-1])  # HWC to CHW, BGR to RGB,contiguous
-    #img = torch.from_numpy(img) # ndarray to tensor
-    img = torch.from_numpy(img).to(device)
-    #img = torch.from_numpy(img)
-    img = img.float()  # uint8 to fp32
-    img /= 255  # 0 - 255 to 0.0 - 1.0
-    if len(img.shape) == 3:
-        img = img[None]  # expand for batch dim
-    return img
-
-
-def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
-    # Resize and pad image while meeting stride-multiple constraints
-    shape = im.shape[:2]  # current shape [height, width]
-    if isinstance(new_shape, int):
-        new_shape = (new_shape, new_shape)
-
-    # Scale ratio (new / old)
-    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
-    if not scaleup:  # only scale down, do not scale up (for better val mAP)
-        r = min(r, 1.0)
-
-    # Compute padding
-    ratio = r, r  # width, height ratios
-    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
-    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
-    if auto:  # minimum rectangle
-        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
-    elif scaleFill:  # stretch
-        dw, dh = 0.0, 0.0
-        new_unpad = (new_shape[1], new_shape[0])
-        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
-
-    dw /= 2  # divide padding into 2 sides
-    dh /= 2
-
-    if shape[::-1] != new_unpad:  # resize
-        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
-    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
-    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
-    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
-    return im, ratio, (dw, dh)
-
-
-def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
-                        labels=(), max_det=300, nc=0, max_time_img=0.05, max_nms=30000, max_wh=7680, ):
-    # Checks
-    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
-    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
-    if isinstance(prediction, (list, tuple)):  # YOLOv8 model in validation model, output = (inference_out, loss_out)
-        prediction = prediction[0]  # select only inference output
-
-    device = prediction.device
-    mps = 'mps' in device.type  # Apple MPS
-    if mps:  # MPS not fully supported yet, convert tensors to CPU before NMS
-        prediction = prediction.cpu()
-    bs = prediction.shape[0]  # batch size
-    nc = nc or (prediction.shape[1] - 4)  # number of classes
-    nm = prediction.shape[1] - nc - 4
-    mi = 4 + nc  # mask start index
-    xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates
-
-    # Settings
-    # min_wh = 2  # (pixels) minimum box width and height
-    time_limit = 0.5 + max_time_img * bs  # seconds to quit after
-    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
-
-    prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
-    prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy
-
-    t = time.time()
-    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
-    for xi, x in enumerate(prediction):  # image index, image inference
-        # Apply constraints
-        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
-        x = x[xc[xi]]  # confidence
-
-        # Cat apriori labels if autolabelling
-        if labels and len(labels[xi]):
-            lb = labels[xi]
-            v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
-            v[:, :4] = xywh2xyxy(lb[:, 1:5])  # box
-            v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
-            x = torch.cat((x, v), 0)
-
-        # If none remain process next image
-        if not x.shape[0]:
-            continue
-
-        # Detections matrix nx6 (xyxy, conf, cls)
-        box, cls, mask = x.split((4, nc, nm), 1)
-
-        if multi_label:
-            i, j = torch.where(cls > conf_thres)
-            x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
-        else:  # best class only
-            conf, j = cls.max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
-
-        # Filter by class
-        if classes is not None:
-            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
-
-        # Check shape
-        n = x.shape[0]  # number of boxes
-        if not n:  # no boxes
-            continue
-        if n > max_nms:  # excess boxes
-            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence and remove excess boxes
-
-        # Batched NMS
-        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
-        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
-        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
-        i = i[:max_det]  # limit detections
-
-        output[xi] = x[i]
-        if mps:
-            output[xi] = output[xi].to(device)
-        if (time.time() - t) > time_limit:
-            print(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
-            break  # time limit exceeded
-    return output
-
-
-def xywh2xyxy(x):
-    """
-    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
-    top-left corner and (x2, y2) is the bottom-right corner.
-    Args:
-        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
-    Returns:
-        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
-    """
-    assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
-    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)  # faster than clone/copy
-    dw = x[..., 2] / 2  # half-width
-    dh = x[..., 3] / 2  # half-height
-    y[..., 0] = x[..., 0] - dw  # top left x
-    y[..., 1] = x[..., 1] - dh  # top left y
-    y[..., 2] = x[..., 0] + dw  # bottom right x
-    y[..., 3] = x[..., 1] + dh  # bottom right y
-    return y
-
-
-def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
-    """
-    Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
-    (img1_shape) to the shape of a different image (img0_shape).
-    Args:
-        img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
-        boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
-        img0_shape (tuple): the shape of the target image, in the format of (height, width).
-        ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
-            calculated based on the size difference between the two images.
-        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
-            rescaling.
-    Returns:
-        boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
-    """
-    if ratio_pad is None:  # calculate from img0_shape
-        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
-        pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
-            (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)  # wh padding
-    else:
-        gain = ratio_pad[0][0]
-        pad = ratio_pad[1]
-
-    if padding:
-        boxes[..., [0, 2]] -= pad[0]  # x padding
-        boxes[..., [1, 3]] -= pad[1]  # y padding
-    boxes[..., :4] /= gain
-    clip_boxes(boxes, img0_shape)
-    return boxes
-
-
-def clip_boxes(boxes, shape):
-    """
-    Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
-
-    Args:
-      boxes (torch.Tensor): the bounding boxes to clip
-      shape (tuple): the shape of the image
-    """
-    if isinstance(boxes, torch.Tensor):  # faster individually
-        boxes[..., 0].clamp_(0, shape[1])  # x1
-        boxes[..., 1].clamp_(0, shape[0])  # y1
-        boxes[..., 2].clamp_(0, shape[1])  # x2
-        boxes[..., 3].clamp_(0, shape[0])  # y2
-    else:  # np.array (faster grouped)
-        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
-        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
-
-
-def process_mask(protos, masks_in, bboxes, shape, ori_shape):
-    """
-    Crop after upsample.
-    proto_out: [mask_dim, mask_h, mask_w]
-    out_masks: [n, mask_dim], n is number of masks after nms
-    bboxes: [n, 4], n is number of masks after nms
-    shape:input_image_size, (h, w)
-
-    return: h, w, n
-    """
-    # mask转换成自定义尺寸
-    c, mh, mw = protos.shape  # CHW
-    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
-    masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
-    # mask转换成原图尺寸
-    gain = min(shape[0] / ori_shape[0], shape[1] / ori_shape[1])  # gain  = old / new
-    pad = (shape[1] - ori_shape[1] * gain) / 2, (shape[0] - ori_shape[0] * gain) / 2  # wh padding
-    top, left = int(pad[1]), int(pad[0])  # y, x
-    bottom, right = int(shape[0] - pad[1]), int(shape[1] - pad[0])
-    if len(masks.shape) < 2:
-        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
-    masks = masks[:, top:bottom, left:right]
-    masks = F.interpolate(masks[None], ori_shape, mode='bilinear', align_corners=False)[0]  # CHW
-    # 裁去box以外的图像
-    crop_masks = []
-    for i, mask in enumerate(masks):
-        mask = mask[int(bboxes[i][1]):int(bboxes[i][3]), int(bboxes[i][0]):int(bboxes[i][2])]
-        crop_masks.append(mask.gt_(0.5))
-    return crop_masks
-
-
-def plot_result(det_cpu, dst_img, masks, category_names):
-    circle_max_contour = []
-    concrete_max_contour = []
-    for i, item in enumerate(det_cpu):
-        # rand_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
-        # 画box
-        box_x1, box_y1, box_x2, box_y2 = item[0:4].astype(np.int32)
-        label = category_names[int(item[5])]
-        rand_color = (0, 255, 255)
-        #cv2.rectangle(dst_img, (box_x1, box_y1), (box_x2, box_y2), color=rand_color, thickness=2)
-        score = item[4]
-        org = (int((box_x1+box_x2)/2), int((box_y1+box_y2)/2))
-        text = '{}|{:.2f}'.format(label, score)
-        cv2.putText(dst_img, text, org=org, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.8, color=rand_color, thickness=2)
-        # 画mask
-        #mask = masks[i].cpu().numpy().astype(int)
-        mask = masks[i].cpu().data.numpy().astype(int)
-        #mask = masks[i].numpy().astype(int)
-        bbox_image = dst_img[box_y1:box_y2, box_x1:box_x2]
-        h, w = box_y2 - box_y1, box_x2 - box_x1
-        mask_colored = np.zeros((h, w, 3), dtype=np.uint8)
-        mask_colored[np.where(mask)] = rand_color
-        ##################################
-        imgray = cv2.cvtColor(mask_colored, cv2.COLOR_BGR2GRAY)
-        # cv2.imshow('mask',imgray)
-        # cv2.waitKey(1)
-        # 2、二进制图像
-        ret, binary = cv2.threshold(imgray, 10, 255, 0)
-        # 阈值 二进制图像
-        # cv2.imshow('bin',binary)
-        # cv2.waitKey(1)
-        contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
-        max_contour = None
-        max_perimeter = 0
-        for contour in contours:
-            perimeter = cv2.arcLength(contour, True)
-            if perimeter > max_perimeter:
-                max_perimeter = perimeter
-                max_contour = contour
-        rect = cv2.minAreaRect(max_contour)
-        # cv2.boxPoints可以将轮廓点转换为四个角点坐标
-        box = cv2.boxPoints(rect)
-        # 这一步不影响后面的画图，但是可以保证四个角点坐标为顺时针
-        startidx = box.sum(axis=1).argmin()
-        box = np.roll(box, 4 - startidx, 0)
-        # 在原图上画出预测的外接矩形
-        box = box.reshape((-1, 1, 2)).astype(np.int32)
-        box = box + [[[box_x1, box_y1]], [[box_x1, box_y1]], [[box_x1, box_y1]], [[box_x1, box_y1]]]
-        cv2.polylines(dst_img, [box], True, (0, 255, 0), 2)
-
-    return dst_img
-    # cv2.imwrite('rs.jpg', dst_img)
-
-
-class yolov8_segment():
-    def __init__(self):
-        super(yolov8_segment, self).__init__()
-
-
-    def load_model(self, model_path, device):
-        self.model = load_model(model_path, device)
-        self.device = device
-
-    def model_inference(self, frame, upd_arr):
-        img = data_preprocess(self.model, frame, [640, 640], self.device)
-
-        # 推理
-        ori_img = frame.copy()
-        result = self.model(img, augment=False)
-        preds = result[0]
-        proto = result[1][-1]
-        # NMS
-        det = non_max_suppression(preds, conf_thres=0.4, iou_thres=0.4, nc=len(self.model.CLASSES))[0]
-        if det.shape[0] != 0:
-            # bbox还原至原图尺寸
-            det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], ori_img.shape)
-            # mask转换成原图尺寸并做裁剪
-            masks = process_mask(proto[0], det[:, 6:], det[:, :4], img.shape[2:], ori_img.shape[0:2])
-            category_names = self.model.CLASSES
-            # 画图
-            # result_frame = plot_result(det.cpu().data.numpy(), ori_img, masks, category_names)
-            return 1 , det.cpu().data.numpy(), ori_img, masks, category_names
-        else:
-            return 0 , None, None, None, None
-
-    def clear(self):
-        del self.model
-
-# model = yolov8_segment()
-# model.load_model('./pt_model/yolov8n-seg.pt','cpu')
-# cap = cv2.VideoCapture(1)
-# while True:
-#     # count_file = len(os.listdir('E:\\A_panckg\\cv_sdk_discharge\\video_save'))  # 数量
-#     ret, frame = cap.read()
-#     if ret:
-#         frame_save_count = 1000
-#         frame = cv2.resize(frame, (1280, 720))
-#         img = model.model_inference(frame, 0)
-#         cv2.imshow("imgrr", img)
-#         cv2.waitKey(1)
-#         #videoWriter(img)
-
-
-
-
+
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''
+@Project -> File   ：yolov8_segment.py
+@IDE    ：PyCharm
+@Author ：hjw
+@Version : 1.0.0
+@Date   ：2024/8/20 9:25
+@Function   ：
+'''
+
+# yolov8 pt模型，实例分割推理
+import cv2
+import time
+import numpy as np
+import torch, torchvision
+import torch.nn.functional as F
+
+
+
+def load_model(model_path, device):
+    model = torch.load(model_path, map_location=device)
+    category_list = model.get('CLASSES', model.get('model').names)
+    model = (model.get('ema') or model['model']).float()  # FP32 model
+    model.__setattr__('CLASSES', category_list)
+    model.fuse().eval()
+    #model = model.cuda()
+    return model
+
+
+def data_preprocess(model, img, img_scale, device):
+    stride, auto = 32, True
+    stride = max(int(model.stride.max()), 32)
+    img = letterbox(img, new_shape=img_scale, stride=stride, auto=auto)[0]  # padded resize
+    img = np.ascontiguousarray(img.transpose((2, 0, 1))[::-1])  # HWC to CHW, BGR to RGB,contiguous
+    #img = torch.from_numpy(img) # ndarray to tensor
+    img = torch.from_numpy(img).to(device)
+    #img = torch.from_numpy(img)
+    img = img.float()  # uint8 to fp32
+    img /= 255  # 0 - 255 to 0.0 - 1.0
+    if len(img.shape) == 3:
+        img = img[None]  # expand for batch dim
+    return img
+
+
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im, ratio, (dw, dh)
+
+
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
+                        labels=(), max_det=300, nc=0, max_time_img=0.05, max_nms=30000, max_wh=7680, ):
+    # Checks
+    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
+    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
+    if isinstance(prediction, (list, tuple)):  # YOLOv8 model in validation model, output = (inference_out, loss_out)
+        prediction = prediction[0]  # select only inference output
+
+    device = prediction.device
+    mps = 'mps' in device.type  # Apple MPS
+    if mps:  # MPS not fully supported yet, convert tensors to CPU before NMS
+        prediction = prediction.cpu()
+    bs = prediction.shape[0]  # batch size
+    nc = nc or (prediction.shape[1] - 4)  # number of classes
+    nm = prediction.shape[1] - nc - 4
+    mi = 4 + nc  # mask start index
+    xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates
+
+    # Settings
+    # min_wh = 2  # (pixels) minimum box width and height
+    time_limit = 0.5 + max_time_img * bs  # seconds to quit after
+    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
+
+    prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
+    prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy
+
+    t = time.time()
+    output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+
+        # Cat apriori labels if autolabelling
+        if labels and len(labels[xi]):
+            lb = labels[xi]
+            v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
+            v[:, :4] = xywh2xyxy(lb[:, 1:5])  # box
+            v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
+            x = torch.cat((x, v), 0)
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        box, cls, mask = x.split((4, nc, nm), 1)
+
+        if multi_label:
+            i, j = torch.where(cls > conf_thres)
+            x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
+        else:  # best class only
+            conf, j = cls.max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
+
+        # Filter by class
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        if n > max_nms:  # excess boxes
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence and remove excess boxes
+
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        i = i[:max_det]  # limit detections
+
+        output[xi] = x[i]
+        if mps:
+            output[xi] = output[xi].to(device)
+        if (time.time() - t) > time_limit:
+            print(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
+            break  # time limit exceeded
+    return output
+
+
+def xywh2xyxy(x):
+    """
+    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
+    top-left corner and (x2, y2) is the bottom-right corner.
+    Args:
+        x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
+    Returns:
+        y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
+    """
+    assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
+    y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x)  # faster than clone/copy
+    dw = x[..., 2] / 2  # half-width
+    dh = x[..., 3] / 2  # half-height
+    y[..., 0] = x[..., 0] - dw  # top left x
+    y[..., 1] = x[..., 1] - dh  # top left y
+    y[..., 2] = x[..., 0] + dw  # bottom right x
+    y[..., 3] = x[..., 1] + dh  # bottom right y
+    return y
+
+
+def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
+    """
+    Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
+    (img1_shape) to the shape of a different image (img0_shape).
+    Args:
+        img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
+        boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
+        img0_shape (tuple): the shape of the target image, in the format of (height, width).
+        ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
+            calculated based on the size difference between the two images.
+        padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
+            rescaling.
+    Returns:
+        boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
+    """
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
+            (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1)  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+
+    if padding:
+        boxes[..., [0, 2]] -= pad[0]  # x padding
+        boxes[..., [1, 3]] -= pad[1]  # y padding
+    boxes[..., :4] /= gain
+    clip_boxes(boxes, img0_shape)
+    return boxes
+
+
+def clip_boxes(boxes, shape):
+    """
+    Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
+
+    Args:
+      boxes (torch.Tensor): the bounding boxes to clip
+      shape (tuple): the shape of the image
+    """
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[..., 0].clamp_(0, shape[1])  # x1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
+        boxes[..., 2].clamp_(0, shape[1])  # x2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
+
+
+def process_mask(protos, masks_in, bboxes, shape, ori_shape):
+    """
+    Crop after upsample.
+    proto_out: [mask_dim, mask_h, mask_w]
+    out_masks: [n, mask_dim], n is number of masks after nms
+    bboxes: [n, 4], n is number of masks after nms
+    shape:input_image_size, (h, w)
+
+    return: h, w, n
+    """
+    # mask转换成自定义尺寸
+    c, mh, mw = protos.shape  # CHW
+    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
+    masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0]  # CHW
+    # mask转换成原图尺寸
+    gain = min(shape[0] / ori_shape[0], shape[1] / ori_shape[1])  # gain  = old / new
+    pad = (shape[1] - ori_shape[1] * gain) / 2, (shape[0] - ori_shape[0] * gain) / 2  # wh padding
+    top, left = int(pad[1]), int(pad[0])  # y, x
+    bottom, right = int(shape[0] - pad[1]), int(shape[1] - pad[0])
+    if len(masks.shape) < 2:
+        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
+    masks = masks[:, top:bottom, left:right]
+    masks = F.interpolate(masks[None], ori_shape, mode='bilinear', align_corners=False)[0]  # CHW
+    # 裁去box以外的图像
+    crop_masks = []
+    for i, mask in enumerate(masks):
+        mask = mask[int(bboxes[i][1]):int(bboxes[i][3]), int(bboxes[i][0]):int(bboxes[i][2])]
+        crop_masks.append(mask.gt_(0.5))
+    return crop_masks
+
+
+def plot_result(det_cpu, dst_img, masks, category_names):
+    circle_max_contour = []
+    concrete_max_contour = []
+    for i, item in enumerate(det_cpu):
+        # rand_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
+        # 画box
+        box_x1, box_y1, box_x2, box_y2 = item[0:4].astype(np.int32)
+        label = category_names[int(item[5])]
+        rand_color = (0, 255, 255)
+        #cv2.rectangle(dst_img, (box_x1, box_y1), (box_x2, box_y2), color=rand_color, thickness=2)
+        score = item[4]
+        org = (int((box_x1+box_x2)/2), int((box_y1+box_y2)/2))
+        text = '{}|{:.2f}'.format(label, score)
+        cv2.putText(dst_img, text, org=org, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.8, color=rand_color, thickness=2)
+        # 画mask
+        #mask = masks[i].cpu().numpy().astype(int)
+        mask = masks[i].cpu().data.numpy().astype(int)
+        #mask = masks[i].numpy().astype(int)
+        bbox_image = dst_img[box_y1:box_y2, box_x1:box_x2]
+        h, w = box_y2 - box_y1, box_x2 - box_x1
+        mask_colored = np.zeros((h, w, 3), dtype=np.uint8)
+        mask_colored[np.where(mask)] = rand_color
+        ##################################
+        imgray = cv2.cvtColor(mask_colored, cv2.COLOR_BGR2GRAY)
+        # cv2.imshow('mask',imgray)
+        # cv2.waitKey(1)
+        # 2、二进制图像
+        ret, binary = cv2.threshold(imgray, 10, 255, 0)
+        # 阈值 二进制图像
+        # cv2.imshow('bin',binary)
+        # cv2.waitKey(1)
+        contours, hierarchy = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
+        max_contour = None
+        max_perimeter = 0
+        for contour in contours:
+            perimeter = cv2.arcLength(contour, True)
+            if perimeter > max_perimeter:
+                max_perimeter = perimeter
+                max_contour = contour
+        rect = cv2.minAreaRect(max_contour)
+        # cv2.boxPoints可以将轮廓点转换为四个角点坐标
+        box = cv2.boxPoints(rect)
+        # 这一步不影响后面的画图，但是可以保证四个角点坐标为顺时针
+        startidx = box.sum(axis=1).argmin()
+        box = np.roll(box, 4 - startidx, 0)
+        # 在原图上画出预测的外接矩形
+        box = box.reshape((-1, 1, 2)).astype(np.int32)
+        box = box + [[[box_x1, box_y1]], [[box_x1, box_y1]], [[box_x1, box_y1]], [[box_x1, box_y1]]]
+        cv2.polylines(dst_img, [box], True, (0, 255, 0), 2)
+
+    return dst_img
+    # cv2.imwrite('rs.jpg', dst_img)
+
+
+class yolov8_segment():
+    def __init__(self):
+        super(yolov8_segment, self).__init__()
+
+
+    def load_model(self, model_path, device):
+        self.model = load_model(model_path, device)
+        self.device = device
+
+    def model_inference(self, frame, upd_arr):
+        img = data_preprocess(self.model, frame, [640, 640], self.device)
+
+        # 推理
+        ori_img = frame.copy()
+        result = self.model(img, augment=False)
+        preds = result[0]
+        proto = result[1][-1]
+        # NMS
+        det = non_max_suppression(preds, conf_thres=0.4, iou_thres=0.4, nc=len(self.model.CLASSES))[0]
+        if det.shape[0] != 0:
+            # bbox还原至原图尺寸
+            det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], ori_img.shape)
+            # mask转换成原图尺寸并做裁剪
+            masks = process_mask(proto[0], det[:, 6:], det[:, :4], img.shape[2:], ori_img.shape[0:2])
+            category_names = self.model.CLASSES
+            # 画图
+            # result_frame = plot_result(det.cpu().data.numpy(), ori_img, masks, category_names)
+            return 1 , det.cpu().data.numpy(), ori_img, masks, category_names
+        else:
+            return 0 , None, None, None, None
+
+    def clear(self):
+        del self.model
+
+# model = yolov8_segment()
+# model.load_model('./pt_model/yolov8n-seg.pt','cpu')
+# cap = cv2.VideoCapture(1)
+# while True:
+#     # count_file = len(os.listdir('E:\\A_panckg\\cv_sdk_discharge\\video_save'))  # 数量
+#     ret, frame = cap.read()
+#     if ret:
+#         frame_save_count = 1000
+#         frame = cv2.resize(frame, (1280, 720))
+#         img = model.model_inference(frame, 0)
+#         cv2.imshow("imgrr", img)
+#         cv2.waitKey(1)
+#         #videoWriter(img)
+
+
+
+