443 lines
11 KiB
Python
443 lines
11 KiB
Python
|
|
import cv2
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 固化配置
|
|||
|
|
# ---------------------------
|
|||
|
|
CLASS_NAMES = {
|
|||
|
|
0: "0",
|
|||
|
|
1: "1",
|
|||
|
|
2: "2",
|
|||
|
|
3: "3",
|
|||
|
|
4: "4"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
|
|||
|
|
# 比如基于 1920x1080
|
|||
|
|
FIXED_ROI = (445, 540, 931, 319)
|
|||
|
|
|
|||
|
|
# 原始分辨率(ROI 对应的标注基准)
|
|||
|
|
ORIG_W = 1920
|
|||
|
|
ORIG_H = 1080
|
|||
|
|
|
|||
|
|
# 当前输入图像尺寸(你说已经固定成 640x640)
|
|||
|
|
CUR_W = 640
|
|||
|
|
CUR_H = 640
|
|||
|
|
|
|||
|
|
IMG_SIZE = 640
|
|||
|
|
RESIZE_MODE = "stretch"
|
|||
|
|
TO_RGB = True
|
|||
|
|
NORMALIZE = False
|
|||
|
|
LAYOUT = "NHWC"
|
|||
|
|
WEIGHT_THRESHOLD = 0.4
|
|||
|
|
W1, W2 = 0.3, 0.7
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 基本工具函数
|
|||
|
|
# ---------------------------
|
|||
|
|
def resize_stretch(image, size=IMG_SIZE):
|
|||
|
|
return cv2.resize(image, (size, size))
|
|||
|
|
|
|||
|
|
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
|
|||
|
|
if resize_mode == "letterbox":
|
|||
|
|
raise NotImplementedError
|
|||
|
|
else:
|
|||
|
|
img_box = resize_stretch(img, size=size)
|
|||
|
|
|
|||
|
|
if to_rgb:
|
|||
|
|
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
|
|||
|
|
|
|||
|
|
img_f = img_box.astype(np.float32)
|
|||
|
|
if normalize:
|
|||
|
|
img_f /= 255.0
|
|||
|
|
|
|||
|
|
if layout == "NHWC":
|
|||
|
|
out = np.expand_dims(img_f, axis=0)
|
|||
|
|
else:
|
|||
|
|
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
|
|||
|
|
|
|||
|
|
return out.astype(np.float32)
|
|||
|
|
|
|||
|
|
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
|
|||
|
|
p1, p2 = float(pred[1]), float(pred[2])
|
|||
|
|
total = p1 + p2
|
|||
|
|
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
|
|||
|
|
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# ROI 缩放函数(核心)
|
|||
|
|
# ---------------------------
|
|||
|
|
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
|
|||
|
|
"""
|
|||
|
|
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
|
|||
|
|
"""
|
|||
|
|
x, y, w, h = orig_roi
|
|||
|
|
|
|||
|
|
# 当前图像尺寸
|
|||
|
|
cur_h, cur_w = img.shape[:2]
|
|||
|
|
|
|||
|
|
scale_x = cur_w / orig_w
|
|||
|
|
scale_y = cur_h / orig_h
|
|||
|
|
|
|||
|
|
sx = int(x * scale_x)
|
|||
|
|
sy = int(y * scale_y)
|
|||
|
|
sw = int(w * scale_x)
|
|||
|
|
sh = int(h * scale_y)
|
|||
|
|
|
|||
|
|
return sx, sy, sw, sh
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 主推理接口
|
|||
|
|
# ---------------------------
|
|||
|
|
def myFunc(rknn_lite, IMG):
|
|||
|
|
if IMG is None or IMG.size == 0:
|
|||
|
|
raise ValueError("输入图像无效")
|
|||
|
|
|
|||
|
|
# 1) 输入已经是 640x640,但 ROI 是 1920x1080 标注的
|
|||
|
|
# 所以必须先缩放 ROI
|
|||
|
|
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
|
|||
|
|
|
|||
|
|
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
|
|||
|
|
flipped_IMG = cv2.flip(IMG, 0)
|
|||
|
|
|
|||
|
|
# 3) 在翻转后的图像上裁剪缩放好的 ROI
|
|||
|
|
x, y, w, h = scaled_roi
|
|||
|
|
roi_img = flipped_IMG[y:y+h, x:x+w]
|
|||
|
|
|
|||
|
|
# 4) RKNN 输入预处理
|
|||
|
|
input_tensor = preprocess_image_for_rknn(
|
|||
|
|
roi_img,
|
|||
|
|
size=IMG_SIZE,
|
|||
|
|
resize_mode=RESIZE_MODE,
|
|||
|
|
to_rgb=TO_RGB,
|
|||
|
|
normalize=NORMALIZE,
|
|||
|
|
layout=LAYOUT
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
|
|||
|
|
|
|||
|
|
# 5) 推理
|
|||
|
|
outputs = rknn_lite.inference([input_tensor])
|
|||
|
|
pred = outputs[0].reshape(-1).astype(float)
|
|||
|
|
class_id = int(np.argmax(pred))
|
|||
|
|
|
|||
|
|
# 分类名称处理
|
|||
|
|
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
|
|||
|
|
if class_id in [1, 2]:
|
|||
|
|
final_class, _, _, _ = weighted_small_large(pred)
|
|||
|
|
else:
|
|||
|
|
final_class = raw_class_name
|
|||
|
|
|
|||
|
|
# 6) 可视化(画在 flipped_IMG)
|
|||
|
|
vis_img = flipped_IMG.copy()
|
|||
|
|
|
|||
|
|
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
|
|||
|
|
|
|||
|
|
ty = y - 10 if y >= 20 else y + h + 20
|
|||
|
|
cv2.putText(
|
|||
|
|
vis_img, f"Class: {final_class}",
|
|||
|
|
(x, ty),
|
|||
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|||
|
|
0.8,(0,255,0), 2
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return vis_img
|
|||
|
|
import cv2
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 固化配置
|
|||
|
|
# ---------------------------
|
|||
|
|
CLASS_NAMES = {
|
|||
|
|
0: "0",
|
|||
|
|
1: "1",
|
|||
|
|
2: "2",
|
|||
|
|
3: "3",
|
|||
|
|
4: "4"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
|
|||
|
|
# 比如基于 1920x1080
|
|||
|
|
FIXED_ROI = (445, 540, 931, 319)
|
|||
|
|
|
|||
|
|
# 原始分辨率(ROI 对应的标注基准)
|
|||
|
|
ORIG_W = 1920
|
|||
|
|
ORIG_H = 1080
|
|||
|
|
|
|||
|
|
# 当前输入图像尺寸(你说已经固定成 640x640)
|
|||
|
|
CUR_W = 640
|
|||
|
|
CUR_H = 640
|
|||
|
|
|
|||
|
|
IMG_SIZE = 640
|
|||
|
|
RESIZE_MODE = "stretch"
|
|||
|
|
TO_RGB = True
|
|||
|
|
NORMALIZE = False
|
|||
|
|
LAYOUT = "NHWC"
|
|||
|
|
WEIGHT_THRESHOLD = 0.4
|
|||
|
|
W1, W2 = 0.3, 0.7
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 基本工具函数
|
|||
|
|
# ---------------------------
|
|||
|
|
def resize_stretch(image, size=IMG_SIZE):
|
|||
|
|
return cv2.resize(image, (size, size))
|
|||
|
|
|
|||
|
|
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
|
|||
|
|
if resize_mode == "letterbox":
|
|||
|
|
raise NotImplementedError
|
|||
|
|
else:
|
|||
|
|
img_box = resize_stretch(img, size=size)
|
|||
|
|
|
|||
|
|
if to_rgb:
|
|||
|
|
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
|
|||
|
|
|
|||
|
|
img_f = img_box.astype(np.float32)
|
|||
|
|
if normalize:
|
|||
|
|
img_f /= 255.0
|
|||
|
|
|
|||
|
|
if layout == "NHWC":
|
|||
|
|
out = np.expand_dims(img_f, axis=0)
|
|||
|
|
else:
|
|||
|
|
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
|
|||
|
|
|
|||
|
|
return out.astype(np.float32)
|
|||
|
|
|
|||
|
|
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
|
|||
|
|
p1, p2 = float(pred[1]), float(pred[2])
|
|||
|
|
total = p1 + p2
|
|||
|
|
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
|
|||
|
|
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# ROI 缩放函数(核心)
|
|||
|
|
# ---------------------------
|
|||
|
|
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
|
|||
|
|
"""
|
|||
|
|
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
|
|||
|
|
"""
|
|||
|
|
x, y, w, h = orig_roi
|
|||
|
|
|
|||
|
|
# 当前图像尺寸
|
|||
|
|
cur_h, cur_w = img.shape[:2]
|
|||
|
|
|
|||
|
|
scale_x = cur_w / orig_w
|
|||
|
|
scale_y = cur_h / orig_h
|
|||
|
|
|
|||
|
|
sx = int(x * scale_x)
|
|||
|
|
sy = int(y * scale_y)
|
|||
|
|
sw = int(w * scale_x)
|
|||
|
|
sh = int(h * scale_y)
|
|||
|
|
|
|||
|
|
return sx, sy, sw, sh
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 主推理接口
|
|||
|
|
# ---------------------------
|
|||
|
|
def myFunc(rknn_lite, IMG):
|
|||
|
|
if IMG is None or IMG.size == 0:
|
|||
|
|
raise ValueError("输入图像无效")
|
|||
|
|
|
|||
|
|
# 1) 输入已经是 640x640,但 ROI 是 1920x1080 标注的
|
|||
|
|
# 所以必须先缩放 ROI
|
|||
|
|
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
|
|||
|
|
|
|||
|
|
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
|
|||
|
|
flipped_IMG = cv2.flip(IMG, 0)
|
|||
|
|
|
|||
|
|
# 3) 在翻转后的图像上裁剪缩放好的 ROI
|
|||
|
|
x, y, w, h = scaled_roi
|
|||
|
|
roi_img = flipped_IMG[y:y+h, x:x+w]
|
|||
|
|
|
|||
|
|
# 4) RKNN 输入预处理
|
|||
|
|
input_tensor = preprocess_image_for_rknn(
|
|||
|
|
roi_img,
|
|||
|
|
size=IMG_SIZE,
|
|||
|
|
resize_mode=RESIZE_MODE,
|
|||
|
|
to_rgb=TO_RGB,
|
|||
|
|
normalize=NORMALIZE,
|
|||
|
|
layout=LAYOUT
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
|
|||
|
|
|
|||
|
|
# 5) 推理
|
|||
|
|
outputs = rknn_lite.inference([input_tensor])
|
|||
|
|
pred = outputs[0].reshape(-1).astype(float)
|
|||
|
|
class_id = int(np.argmax(pred))
|
|||
|
|
|
|||
|
|
# 分类名称处理
|
|||
|
|
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
|
|||
|
|
if class_id in [1, 2]:
|
|||
|
|
final_class, _, _, _ = weighted_small_large(pred)
|
|||
|
|
else:
|
|||
|
|
final_class = raw_class_name
|
|||
|
|
|
|||
|
|
# 6) 可视化(画在 flipped_IMG)
|
|||
|
|
vis_img = flipped_IMG.copy()
|
|||
|
|
|
|||
|
|
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
|
|||
|
|
|
|||
|
|
ty = y - 10 if y >= 20 else y + h + 20
|
|||
|
|
cv2.putText(
|
|||
|
|
vis_img, f"Class: {final_class}",
|
|||
|
|
(x, ty),
|
|||
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|||
|
|
0.8,(0,255,0), 2
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return vis_img
|
|||
|
|
import cv2
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 固化配置
|
|||
|
|
# ---------------------------
|
|||
|
|
CLASS_NAMES = {
|
|||
|
|
0: "0",
|
|||
|
|
1: "1",
|
|||
|
|
2: "2",
|
|||
|
|
3: "3",
|
|||
|
|
4: "4"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
|
|||
|
|
# 比如基于 1920x1080
|
|||
|
|
FIXED_ROI = (445, 540, 931, 319)
|
|||
|
|
|
|||
|
|
# 原始分辨率(ROI 对应的标注基准)
|
|||
|
|
ORIG_W = 1920
|
|||
|
|
ORIG_H = 1080
|
|||
|
|
|
|||
|
|
# 当前输入图像尺寸(你说已经固定成 640x640)
|
|||
|
|
CUR_W = 640
|
|||
|
|
CUR_H = 640
|
|||
|
|
|
|||
|
|
IMG_SIZE = 640
|
|||
|
|
RESIZE_MODE = "stretch"
|
|||
|
|
TO_RGB = True
|
|||
|
|
NORMALIZE = False
|
|||
|
|
LAYOUT = "NHWC"
|
|||
|
|
WEIGHT_THRESHOLD = 0.4
|
|||
|
|
W1, W2 = 0.3, 0.7
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 基本工具函数
|
|||
|
|
# ---------------------------
|
|||
|
|
def resize_stretch(image, size=IMG_SIZE):
|
|||
|
|
return cv2.resize(image, (size, size))
|
|||
|
|
|
|||
|
|
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
|
|||
|
|
if resize_mode == "letterbox":
|
|||
|
|
raise NotImplementedError
|
|||
|
|
else:
|
|||
|
|
img_box = resize_stretch(img, size=size)
|
|||
|
|
|
|||
|
|
if to_rgb:
|
|||
|
|
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
|
|||
|
|
|
|||
|
|
img_f = img_box.astype(np.float32)
|
|||
|
|
if normalize:
|
|||
|
|
img_f /= 255.0
|
|||
|
|
|
|||
|
|
if layout == "NHWC":
|
|||
|
|
out = np.expand_dims(img_f, axis=0)
|
|||
|
|
else:
|
|||
|
|
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
|
|||
|
|
|
|||
|
|
return out.astype(np.float32)
|
|||
|
|
|
|||
|
|
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
|
|||
|
|
p1, p2 = float(pred[1]), float(pred[2])
|
|||
|
|
total = p1 + p2
|
|||
|
|
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
|
|||
|
|
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# ROI 缩放函数(核心)
|
|||
|
|
# ---------------------------
|
|||
|
|
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
|
|||
|
|
"""
|
|||
|
|
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
|
|||
|
|
"""
|
|||
|
|
x, y, w, h = orig_roi
|
|||
|
|
|
|||
|
|
# 当前图像尺寸
|
|||
|
|
cur_h, cur_w = img.shape[:2]
|
|||
|
|
|
|||
|
|
scale_x = cur_w / orig_w
|
|||
|
|
scale_y = cur_h / orig_h
|
|||
|
|
|
|||
|
|
sx = int(x * scale_x)
|
|||
|
|
sy = int(y * scale_y)
|
|||
|
|
sw = int(w * scale_x)
|
|||
|
|
sh = int(h * scale_y)
|
|||
|
|
|
|||
|
|
return sx, sy, sw, sh
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ---------------------------
|
|||
|
|
# 主推理接口
|
|||
|
|
# ---------------------------
|
|||
|
|
def myFunc(rknn_lite, IMG):
|
|||
|
|
if IMG is None or IMG.size == 0:
|
|||
|
|
raise ValueError("输入图像无效")
|
|||
|
|
|
|||
|
|
# 1) 输入已经是 640x640,但 ROI 是 1920x1080 标注的
|
|||
|
|
# 所以必须先缩放 ROI
|
|||
|
|
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
|
|||
|
|
|
|||
|
|
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
|
|||
|
|
flipped_IMG = cv2.flip(IMG, 0)
|
|||
|
|
|
|||
|
|
# 3) 在翻转后的图像上裁剪缩放好的 ROI
|
|||
|
|
x, y, w, h = scaled_roi
|
|||
|
|
roi_img = flipped_IMG[y:y+h, x:x+w]
|
|||
|
|
|
|||
|
|
# 4) RKNN 输入预处理
|
|||
|
|
input_tensor = preprocess_image_for_rknn(
|
|||
|
|
roi_img,
|
|||
|
|
size=IMG_SIZE,
|
|||
|
|
resize_mode=RESIZE_MODE,
|
|||
|
|
to_rgb=TO_RGB,
|
|||
|
|
normalize=NORMALIZE,
|
|||
|
|
layout=LAYOUT
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
|
|||
|
|
|
|||
|
|
# 5) 推理
|
|||
|
|
outputs = rknn_lite.inference([input_tensor])
|
|||
|
|
pred = outputs[0].reshape(-1).astype(float)
|
|||
|
|
class_id = int(np.argmax(pred))
|
|||
|
|
|
|||
|
|
# 分类名称处理
|
|||
|
|
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
|
|||
|
|
if class_id in [1, 2]:
|
|||
|
|
final_class, _, _, _ = weighted_small_large(pred)
|
|||
|
|
else:
|
|||
|
|
final_class = raw_class_name
|
|||
|
|
|
|||
|
|
# 6) 可视化(画在 flipped_IMG)
|
|||
|
|
vis_img = flipped_IMG.copy()
|
|||
|
|
|
|||
|
|
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
|
|||
|
|
|
|||
|
|
ty = y - 10 if y >= 20 else y + h + 20
|
|||
|
|
cv2.putText(
|
|||
|
|
vis_img, f"Class: {final_class}",
|
|||
|
|
(x, ty),
|
|||
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|||
|
|
0.8,(0,255,0), 2
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return vis_img
|
|||
|
|
|