Files
琉璃月光 8506c3af79 first commit
2025-12-16 15:12:02 +08:00

443 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img
import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img
import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img