Files

443 lines
11 KiB
Python
Raw Permalink Normal View History

2025-12-16 15:12:02 +08:00
import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标缩放到当前尺寸的图像
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img
import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标缩放到当前尺寸的图像
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img
import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标缩放到当前尺寸的图像
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img