xiantiao_CV/rknn-multi-threaded-nosigmoid/func_cls5.py

import cv2
import numpy as np

# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
    0: "0",
    1: "1",
    2: "2",
    3: "3",
    4: "4"
}

# FIXED_ROI 是基于原始分辨率图像（翻转后）的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)

# 原始分辨率（ROI 对应的标注基准）
ORIG_W = 1920
ORIG_H = 1080

# 当前输入图像尺寸（你说已经固定成 640x640）
CUR_W = 640
CUR_H = 640

IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7


# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
    return cv2.resize(image, (size, size))

def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
    if resize_mode == "letterbox":
        raise NotImplementedError
    else:
        img_box = resize_stretch(img, size=size)

    if to_rgb:
        img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)

    img_f = img_box.astype(np.float32)
    if normalize:
        img_f /= 255.0

    if layout == "NHWC":
        out = np.expand_dims(img_f, axis=0)
    else:
        out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)

    return out.astype(np.float32)

def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
    p1, p2 = float(pred[1]), float(pred[2])
    total = p1 + p2
    score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
    return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2


# ---------------------------
# ROI 缩放函数（核心）
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
    """
    将基于原始分辨率的 ROI 坐标，缩放到当前尺寸的图像。
    """
    x, y, w, h = orig_roi

    # 当前图像尺寸
    cur_h, cur_w = img.shape[:2]

    scale_x = cur_w / orig_w
    scale_y = cur_h / orig_h

    sx = int(x * scale_x)
    sy = int(y * scale_y)
    sw = int(w * scale_x)
    sh = int(h * scale_y)

    return sx, sy, sw, sh


# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
    if IMG is None or IMG.size == 0:
        raise ValueError("输入图像无效")

    # 1) 输入已经是 640x640，但 ROI 是 1920x1080 标注的
    #    所以必须先缩放 ROI
    scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)

    # 2) 翻转图像（你说 ROI 是基于“翻转后的原图”）
    flipped_IMG = cv2.flip(IMG, 0)

    # 3) 在翻转后的图像上裁剪缩放好的 ROI
    x, y, w, h = scaled_roi
    roi_img = flipped_IMG[y:y+h, x:x+w]

    # 4) RKNN 输入预处理
    input_tensor = preprocess_image_for_rknn(
        roi_img,
        size=IMG_SIZE,
        resize_mode=RESIZE_MODE,
        to_rgb=TO_RGB,
        normalize=NORMALIZE,
        layout=LAYOUT
    )

    input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))

    # 5) 推理
    outputs = rknn_lite.inference([input_tensor])
    pred = outputs[0].reshape(-1).astype(float)
    class_id = int(np.argmax(pred))

    # 分类名称处理
    raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
    if class_id in [1, 2]:
        final_class, _, _, _ = weighted_small_large(pred)
    else:
        final_class = raw_class_name

    # 6) 可视化（画在 flipped_IMG）
    vis_img = flipped_IMG.copy()

    cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)

    ty = y - 10 if y >= 20 else y + h + 20
    cv2.putText(
        vis_img, f"Class: {final_class}",
        (x, ty),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,(0,255,0), 2
    )

    return vis_img
import cv2
import numpy as np

# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
    0: "0",
    1: "1",
    2: "2",
    3: "3",
    4: "4"
}

# FIXED_ROI 是基于原始分辨率图像（翻转后）的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)

# 原始分辨率（ROI 对应的标注基准）
ORIG_W = 1920
ORIG_H = 1080

# 当前输入图像尺寸（你说已经固定成 640x640）
CUR_W = 640
CUR_H = 640

IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7


# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
    return cv2.resize(image, (size, size))

def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
    if resize_mode == "letterbox":
        raise NotImplementedError
    else:
        img_box = resize_stretch(img, size=size)

    if to_rgb:
        img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)

    img_f = img_box.astype(np.float32)
    if normalize:
        img_f /= 255.0

    if layout == "NHWC":
        out = np.expand_dims(img_f, axis=0)
    else:
        out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)

    return out.astype(np.float32)

def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
    p1, p2 = float(pred[1]), float(pred[2])
    total = p1 + p2
    score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
    return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2


# ---------------------------
# ROI 缩放函数（核心）
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
    """
    将基于原始分辨率的 ROI 坐标，缩放到当前尺寸的图像。
    """
    x, y, w, h = orig_roi

    # 当前图像尺寸
    cur_h, cur_w = img.shape[:2]

    scale_x = cur_w / orig_w
    scale_y = cur_h / orig_h

    sx = int(x * scale_x)
    sy = int(y * scale_y)
    sw = int(w * scale_x)
    sh = int(h * scale_y)

    return sx, sy, sw, sh


# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
    if IMG is None or IMG.size == 0:
        raise ValueError("输入图像无效")

    # 1) 输入已经是 640x640，但 ROI 是 1920x1080 标注的
    #    所以必须先缩放 ROI
    scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)

    # 2) 翻转图像（你说 ROI 是基于“翻转后的原图”）
    flipped_IMG = cv2.flip(IMG, 0)

    # 3) 在翻转后的图像上裁剪缩放好的 ROI
    x, y, w, h = scaled_roi
    roi_img = flipped_IMG[y:y+h, x:x+w]

    # 4) RKNN 输入预处理
    input_tensor = preprocess_image_for_rknn(
        roi_img,
        size=IMG_SIZE,
        resize_mode=RESIZE_MODE,
        to_rgb=TO_RGB,
        normalize=NORMALIZE,
        layout=LAYOUT
    )

    input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))

    # 5) 推理
    outputs = rknn_lite.inference([input_tensor])
    pred = outputs[0].reshape(-1).astype(float)
    class_id = int(np.argmax(pred))

    # 分类名称处理
    raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
    if class_id in [1, 2]:
        final_class, _, _, _ = weighted_small_large(pred)
    else:
        final_class = raw_class_name

    # 6) 可视化（画在 flipped_IMG）
    vis_img = flipped_IMG.copy()

    cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)

    ty = y - 10 if y >= 20 else y + h + 20
    cv2.putText(
        vis_img, f"Class: {final_class}",
        (x, ty),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,(0,255,0), 2
    )

    return vis_img
import cv2
import numpy as np

# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
    0: "0",
    1: "1",
    2: "2",
    3: "3",
    4: "4"
}

# FIXED_ROI 是基于原始分辨率图像（翻转后）的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)

# 原始分辨率（ROI 对应的标注基准）
ORIG_W = 1920
ORIG_H = 1080

# 当前输入图像尺寸（你说已经固定成 640x640）
CUR_W = 640
CUR_H = 640

IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7


# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
    return cv2.resize(image, (size, size))

def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
    if resize_mode == "letterbox":
        raise NotImplementedError
    else:
        img_box = resize_stretch(img, size=size)

    if to_rgb:
        img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)

    img_f = img_box.astype(np.float32)
    if normalize:
        img_f /= 255.0

    if layout == "NHWC":
        out = np.expand_dims(img_f, axis=0)
    else:
        out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)

    return out.astype(np.float32)

def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
    p1, p2 = float(pred[1]), float(pred[2])
    total = p1 + p2
    score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
    return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2


# ---------------------------
# ROI 缩放函数（核心）
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
    """
    将基于原始分辨率的 ROI 坐标，缩放到当前尺寸的图像。
    """
    x, y, w, h = orig_roi

    # 当前图像尺寸
    cur_h, cur_w = img.shape[:2]

    scale_x = cur_w / orig_w
    scale_y = cur_h / orig_h

    sx = int(x * scale_x)
    sy = int(y * scale_y)
    sw = int(w * scale_x)
    sh = int(h * scale_y)

    return sx, sy, sw, sh


# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
    if IMG is None or IMG.size == 0:
        raise ValueError("输入图像无效")

    # 1) 输入已经是 640x640，但 ROI 是 1920x1080 标注的
    #    所以必须先缩放 ROI
    scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)

    # 2) 翻转图像（你说 ROI 是基于“翻转后的原图”）
    flipped_IMG = cv2.flip(IMG, 0)

    # 3) 在翻转后的图像上裁剪缩放好的 ROI
    x, y, w, h = scaled_roi
    roi_img = flipped_IMG[y:y+h, x:x+w]

    # 4) RKNN 输入预处理
    input_tensor = preprocess_image_for_rknn(
        roi_img,
        size=IMG_SIZE,
        resize_mode=RESIZE_MODE,
        to_rgb=TO_RGB,
        normalize=NORMALIZE,
        layout=LAYOUT
    )

    input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))

    # 5) 推理
    outputs = rknn_lite.inference([input_tensor])
    pred = outputs[0].reshape(-1).astype(float)
    class_id = int(np.argmax(pred))

    # 分类名称处理
    raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
    if class_id in [1, 2]:
        final_class, _, _, _ = weighted_small_large(pred)
    else:
        final_class = raw_class_name

    # 6) 可视化（画在 flipped_IMG）
    vis_img = flipped_IMG.copy()

    cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)

    ty = y - 10 if y >= 20 else y + h + 20
    cv2.putText(
        vis_img, f"Class: {final_class}",
        (x, ty),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,(0,255,0), 2
    )

    return vis_img