import cv2 import numpy as np # --------------------------- # 固化配置 # --------------------------- CLASS_NAMES = { 0: "0", 1: "1", 2: "2", 3: "3", 4: "4" } # FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标 # 比如基于 1920x1080 FIXED_ROI = (445, 540, 931, 319) # 原始分辨率(ROI 对应的标注基准) ORIG_W = 1920 ORIG_H = 1080 # 当前输入图像尺寸(你说已经固定成 640x640) CUR_W = 640 CUR_H = 640 IMG_SIZE = 640 RESIZE_MODE = "stretch" TO_RGB = True NORMALIZE = False LAYOUT = "NHWC" WEIGHT_THRESHOLD = 0.4 W1, W2 = 0.3, 0.7 # --------------------------- # 基本工具函数 # --------------------------- def resize_stretch(image, size=IMG_SIZE): return cv2.resize(image, (size, size)) def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout): if resize_mode == "letterbox": raise NotImplementedError else: img_box = resize_stretch(img, size=size) if to_rgb: img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB) img_f = img_box.astype(np.float32) if normalize: img_f /= 255.0 if layout == "NHWC": out = np.expand_dims(img_f, axis=0) else: out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0) return out.astype(np.float32) def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2): p1, p2 = float(pred[1]), float(pred[2]) total = p1 + p2 score = (w1*p1 + w2*p2)/total if total > 0 else 0.0 return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2 # --------------------------- # ROI 缩放函数(核心) # --------------------------- def scale_roi_to_current(img, orig_roi, orig_w, orig_h): """ 将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。 """ x, y, w, h = orig_roi # 当前图像尺寸 cur_h, cur_w = img.shape[:2] scale_x = cur_w / orig_w scale_y = cur_h / orig_h sx = int(x * scale_x) sy = int(y * scale_y) sw = int(w * scale_x) sh = int(h * scale_y) return sx, sy, sw, sh # --------------------------- # 主推理接口 # --------------------------- def myFunc(rknn_lite, IMG): if IMG is None or IMG.size == 0: raise ValueError("输入图像无效") # 1) 输入已经是 640x640,但 ROI 是 1920x1080 标注的 # 所以必须先缩放 ROI scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H) # 2) 翻转图像(你说 ROI 是基于“翻转后的原图”) flipped_IMG = cv2.flip(IMG, 0) # 3) 在翻转后的图像上裁剪缩放好的 ROI x, y, w, h = scaled_roi roi_img = flipped_IMG[y:y+h, x:x+w] # 4) RKNN 输入预处理 input_tensor = preprocess_image_for_rknn( roi_img, size=IMG_SIZE, resize_mode=RESIZE_MODE, to_rgb=TO_RGB, normalize=NORMALIZE, layout=LAYOUT ) input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32)) # 5) 推理 outputs = rknn_lite.inference([input_tensor]) pred = outputs[0].reshape(-1).astype(float) class_id = int(np.argmax(pred)) # 分类名称处理 raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})") if class_id in [1, 2]: final_class, _, _, _ = weighted_small_large(pred) else: final_class = raw_class_name # 6) 可视化(画在 flipped_IMG) vis_img = flipped_IMG.copy() cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2) ty = y - 10 if y >= 20 else y + h + 20 cv2.putText( vis_img, f"Class: {final_class}", (x, ty), cv2.FONT_HERSHEY_SIMPLEX, 0.8,(0,255,0), 2 ) return vis_img import cv2 import numpy as np # --------------------------- # 固化配置 # --------------------------- CLASS_NAMES = { 0: "0", 1: "1", 2: "2", 3: "3", 4: "4" } # FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标 # 比如基于 1920x1080 FIXED_ROI = (445, 540, 931, 319) # 原始分辨率(ROI 对应的标注基准) ORIG_W = 1920 ORIG_H = 1080 # 当前输入图像尺寸(你说已经固定成 640x640) CUR_W = 640 CUR_H = 640 IMG_SIZE = 640 RESIZE_MODE = "stretch" TO_RGB = True NORMALIZE = False LAYOUT = "NHWC" WEIGHT_THRESHOLD = 0.4 W1, W2 = 0.3, 0.7 # --------------------------- # 基本工具函数 # --------------------------- def resize_stretch(image, size=IMG_SIZE): return cv2.resize(image, (size, size)) def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout): if resize_mode == "letterbox": raise NotImplementedError else: img_box = resize_stretch(img, size=size) if to_rgb: img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB) img_f = img_box.astype(np.float32) if normalize: img_f /= 255.0 if layout == "NHWC": out = np.expand_dims(img_f, axis=0) else: out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0) return out.astype(np.float32) def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2): p1, p2 = float(pred[1]), float(pred[2]) total = p1 + p2 score = (w1*p1 + w2*p2)/total if total > 0 else 0.0 return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2 # --------------------------- # ROI 缩放函数(核心) # --------------------------- def scale_roi_to_current(img, orig_roi, orig_w, orig_h): """ 将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。 """ x, y, w, h = orig_roi # 当前图像尺寸 cur_h, cur_w = img.shape[:2] scale_x = cur_w / orig_w scale_y = cur_h / orig_h sx = int(x * scale_x) sy = int(y * scale_y) sw = int(w * scale_x) sh = int(h * scale_y) return sx, sy, sw, sh # --------------------------- # 主推理接口 # --------------------------- def myFunc(rknn_lite, IMG): if IMG is None or IMG.size == 0: raise ValueError("输入图像无效") # 1) 输入已经是 640x640,但 ROI 是 1920x1080 标注的 # 所以必须先缩放 ROI scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H) # 2) 翻转图像(你说 ROI 是基于“翻转后的原图”) flipped_IMG = cv2.flip(IMG, 0) # 3) 在翻转后的图像上裁剪缩放好的 ROI x, y, w, h = scaled_roi roi_img = flipped_IMG[y:y+h, x:x+w] # 4) RKNN 输入预处理 input_tensor = preprocess_image_for_rknn( roi_img, size=IMG_SIZE, resize_mode=RESIZE_MODE, to_rgb=TO_RGB, normalize=NORMALIZE, layout=LAYOUT ) input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32)) # 5) 推理 outputs = rknn_lite.inference([input_tensor]) pred = outputs[0].reshape(-1).astype(float) class_id = int(np.argmax(pred)) # 分类名称处理 raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})") if class_id in [1, 2]: final_class, _, _, _ = weighted_small_large(pred) else: final_class = raw_class_name # 6) 可视化(画在 flipped_IMG) vis_img = flipped_IMG.copy() cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2) ty = y - 10 if y >= 20 else y + h + 20 cv2.putText( vis_img, f"Class: {final_class}", (x, ty), cv2.FONT_HERSHEY_SIMPLEX, 0.8,(0,255,0), 2 ) return vis_img import cv2 import numpy as np # --------------------------- # 固化配置 # --------------------------- CLASS_NAMES = { 0: "0", 1: "1", 2: "2", 3: "3", 4: "4" } # FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标 # 比如基于 1920x1080 FIXED_ROI = (445, 540, 931, 319) # 原始分辨率(ROI 对应的标注基准) ORIG_W = 1920 ORIG_H = 1080 # 当前输入图像尺寸(你说已经固定成 640x640) CUR_W = 640 CUR_H = 640 IMG_SIZE = 640 RESIZE_MODE = "stretch" TO_RGB = True NORMALIZE = False LAYOUT = "NHWC" WEIGHT_THRESHOLD = 0.4 W1, W2 = 0.3, 0.7 # --------------------------- # 基本工具函数 # --------------------------- def resize_stretch(image, size=IMG_SIZE): return cv2.resize(image, (size, size)) def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout): if resize_mode == "letterbox": raise NotImplementedError else: img_box = resize_stretch(img, size=size) if to_rgb: img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB) img_f = img_box.astype(np.float32) if normalize: img_f /= 255.0 if layout == "NHWC": out = np.expand_dims(img_f, axis=0) else: out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0) return out.astype(np.float32) def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2): p1, p2 = float(pred[1]), float(pred[2]) total = p1 + p2 score = (w1*p1 + w2*p2)/total if total > 0 else 0.0 return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2 # --------------------------- # ROI 缩放函数(核心) # --------------------------- def scale_roi_to_current(img, orig_roi, orig_w, orig_h): """ 将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。 """ x, y, w, h = orig_roi # 当前图像尺寸 cur_h, cur_w = img.shape[:2] scale_x = cur_w / orig_w scale_y = cur_h / orig_h sx = int(x * scale_x) sy = int(y * scale_y) sw = int(w * scale_x) sh = int(h * scale_y) return sx, sy, sw, sh # --------------------------- # 主推理接口 # --------------------------- def myFunc(rknn_lite, IMG): if IMG is None or IMG.size == 0: raise ValueError("输入图像无效") # 1) 输入已经是 640x640,但 ROI 是 1920x1080 标注的 # 所以必须先缩放 ROI scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H) # 2) 翻转图像(你说 ROI 是基于“翻转后的原图”) flipped_IMG = cv2.flip(IMG, 0) # 3) 在翻转后的图像上裁剪缩放好的 ROI x, y, w, h = scaled_roi roi_img = flipped_IMG[y:y+h, x:x+w] # 4) RKNN 输入预处理 input_tensor = preprocess_image_for_rknn( roi_img, size=IMG_SIZE, resize_mode=RESIZE_MODE, to_rgb=TO_RGB, normalize=NORMALIZE, layout=LAYOUT ) input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32)) # 5) 推理 outputs = rknn_lite.inference([input_tensor]) pred = outputs[0].reshape(-1).astype(float) class_id = int(np.argmax(pred)) # 分类名称处理 raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})") if class_id in [1, 2]: final_class, _, _, _ = weighted_small_large(pred) else: final_class = raw_class_name # 6) 可视化(画在 flipped_IMG) vis_img = flipped_IMG.copy() cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2) ty = y - 10 if y >= 20 else y + h + 20 cv2.putText( vis_img, f"Class: {final_class}", (x, ty), cv2.FONT_HERSHEY_SIMPLEX, 0.8,(0,255,0), 2 ) return vis_img