diff --git a/zhuangtai_class_cls_1980x1080/.idea/.gitignore b/.idea/.gitignore similarity index 100% rename from zhuangtai_class_cls_1980x1080/.idea/.gitignore rename to .idea/.gitignore diff --git a/zhuangtai_class_cls_1980x1080/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml similarity index 100% rename from zhuangtai_class_cls_1980x1080/.idea/inspectionProfiles/profiles_settings.xml rename to .idea/inspectionProfiles/profiles_settings.xml diff --git a/zhuangtai_class_cls_1980x1080/.idea/misc.xml b/.idea/misc.xml similarity index 100% rename from zhuangtai_class_cls_1980x1080/.idea/misc.xml rename to .idea/misc.xml diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..d998112 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..8306744 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/zhuangtai_class_cls_1980x1080/.idea/zhuangtai_class_cls.iml b/.idea/zjsh_code_jicheng.iml similarity index 100% rename from zhuangtai_class_cls_1980x1080/.idea/zhuangtai_class_cls.iml rename to .idea/zjsh_code_jicheng.iml diff --git a/LED_send/led_send.py b/LED_send/led_send.py index 90674b1..e28ec17 100644 --- a/LED_send/led_send.py +++ b/LED_send/led_send.py @@ -65,6 +65,8 @@ if lib is None: # ====================== 生成 LED 表格 ====================== def generate_led_table(data, output_path="led_send.png", font_path="msyh.ttc"): + from PIL import Image, ImageDraw, ImageFont + try: font_title = ImageFont.truetype(font_path, 24) font_data = ImageFont.truetype(font_path, 20) @@ -76,7 +78,7 @@ def generate_led_table(data, output_path="led_send.png", font_path="msyh.ttc"): font_title = font_data = font_data_big = font_small = ImageFont.load_default() header_font = ImageFont.load_default() - total_width, total_height = 640, 448 + total_width, total_height = 630, 430 img = Image.new("RGB", (total_width, total_height), (0, 0, 0)) draw = ImageDraw.Draw(img) @@ -84,74 +86,95 @@ def generate_led_table(data, output_path="led_send.png", font_path="msyh.ttc"): row_count = 8 row_heights = [int(total_height * 0.095)] * 6 + [int(total_height * 0.15), int(total_height * 0.15)] y_positions = [0] - for h in row_heights[:-1]: + for h in row_heights: y_positions.append(y_positions[-1] + h) col_width = total_width // col_count + # 表头 header_text = "浇筑工序信息屏测试" bbox = draw.textbbox((0, 0), header_text, font=header_font) tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1] draw.text(((total_width - tw) // 2, 7), header_text, fill="Yellow", font=header_font) - # safe float parse + # safe float try: - task_quantity = float(data.get("TotMete", 0)) + task_quantity = float(data.get("TotMete", 0.0)) + fixed_value = float(data.get("BetonVolumeAlready", 0.0)) except Exception: task_quantity = 0.0 + fixed_value = 0.0 task_quantity_str = f"{task_quantity}" + fixed_value_str = f"/{fixed_value}" table_data = [ ["本盘方量", "当前模具", "高斗称值", "低斗称值"], - [str(data.get("PlateVolume", "")), str(data.get("MouldCode", "")), str(data.get("HighBucketWeighingValue", "")), str(data.get("LowBucketWeighingValue", ""))], + [str(data.get("PlateVolume", "")), str(data.get("MouldCode", "")), + str(data.get("UpperWeight", "")), str(data.get("LowerWeight", ""))], ["投料时间", "当前管片", "砼出料温度", "振捣频率"], - [str(data.get("ProduceStartTime", "")), str(data.get("ArtifactID", "")), str(data.get("Temper", "")), str(data.get("VibrationFrequency", ""))], + [str(data.get("ProduceStartTime", "")), str(data.get("ArtifactID", "")), + str(data.get("Temper", "")), str(data.get("VibrationFrequency", ""))], ["累计盘次", "隐蔽验收", "车间环温", "任务方量"], - [str(data.get("PlateIDSerial", "任务方量")), str(data.get("CheckResult", "")), str(data.get("WorkshopTemperature", "")), ""], + [str(data.get("PlateIDSerial", "")), str(data.get("CheckResult", "")), + str(data.get("WorkshopTemperature", "")), ""], ["配方比例", "", "", ""], ["拆模强度", "", "", ""] ] - # 画表格框 - for r in range(row_count): - y1 = y_positions[r] + 40 - h = row_heights[r] - for c in range(col_count): - x1 = c * col_width - if r >= 6 and c == 1: - draw.rectangle([x1, y1, total_width - 1, y1 + h - 1], outline="white", width=1) - break - elif r >= 6 and c > 1: - continue - else: - draw.rectangle([x1, y1, x1 + col_width - 1, y1 + h - 1], outline="white", width=1) + # ======================= + # 画表格线(只用 line) + # ======================= + line_color = (255, 255, 255) + line_width = 1 + # 横线 + for r in range(row_count + 1): + y = y_positions[r] + 40 if r < row_count else y_positions[-1] + 40 + draw.line([(0, y), (total_width, y)], fill=line_color, width=line_width) + + # 竖线 + for c in range(col_count + 1): + x = c * col_width + # 前6行所有竖线 + for r in range(6): + y1 = y_positions[r] + 40 + y2 = y_positions[r + 1] + 40 + draw.line([(x, y1), (x, y2)], fill=line_color, width=line_width) + + # 最后两行 + y1 = y_positions[6] + 40 + y2 = y_positions[8] + 40 + if c == 0 or c == col_count: # 左右边框 + draw.line([(x, y1), (x, y2)], fill=line_color, width=line_width) + elif c == 1: # 第二列竖线(分隔跨列内容) + draw.line([(x, y1), (x, y2)], fill=line_color, width=line_width) + # 第三列和第四列竖线不画,保持跨列显示 + + # ======================= # 绘制文本 + # ======================= for r in range(row_count): y1 = y_positions[r] + 40 h = row_heights[r] for c in range(col_count): x1 = c * col_width content = table_data[r][c] + if not content.strip(): if r == 5 and c == 3: bbox_task = draw.textbbox((0, 0), task_quantity_str, font=font_data) tw_task = bbox_task[2] - bbox_task[0] th_task = bbox_task[3] - bbox_task[1] - # 红色显示任务数量 draw.text((x1 + (col_width - 1.8 * tw_task) // 2, y1 + (h - th_task) // 2), task_quantity_str, fill="red", font=font_data) - # 亮绿色显示固定值 "/214.1" - fixed_text = "/214.1" - bbox_fixed = draw.textbbox((0, 0), fixed_text, font=font_data) + bbox_fixed = draw.textbbox((0, 0), fixed_value_str, font=font_data) tw_fixed = bbox_fixed[2] - bbox_fixed[0] - draw.text((x1 + (col_width - tw_fixed) // 2 + 0.78 * tw_task, y1 + (h - th_task) // 2), - fixed_text, fill=(0, 255, 0), font=font_data) + draw.text((x1 + (col_width - tw_fixed) // 2 + 0.78 * tw_task, + y1 + (h - th_task) // 2), + fixed_value_str, fill=(0, 255, 0), font=font_data) continue is_header = r in (0, 2, 4, 6, 7) - # 亮绿色显示表头 color = (0, 255, 0) if is_header else "red" - if color == "red" and r < 3: font = font_data_big elif color == "red" and r >= 6: @@ -164,27 +187,29 @@ def generate_led_table(data, output_path="led_send.png", font_path="msyh.ttc"): th = bbox[3] - bbox[1] draw.text((x1 + (col_width - tw) // 2, y1 + (h - th) // 2), content, fill=color, font=font) - # 多行文本居中函数 + # 多行文本居中 def draw_multiline_text_center(draw_obj, x, y, width, height, text, font_obj, fill="red"): lines = text.split('\n') bboxs = [draw_obj.textbbox((0, 0), line, font=font_obj) for line in lines] total_h = sum(b[3] - b[1] for b in bboxs) - y_start = y + (height - total_h) // 2 - curr_y = y_start + cy = y + (height - total_h) // 2 for line, b in zip(lines, bboxs): w = b[2] - b[0] h = b[3] - b[1] - draw_obj.text((x + (width - w) // 2, curr_y), line, fill=fill, font=font_obj) - curr_y += h + draw_obj.text((x + (width - w) // 2, cy), line, fill=fill, font=font_obj) + cy += h draw_multiline_text_center(draw, col_width * 1, y_positions[6] + 40, col_width * 3, row_heights[6], str(data.get("FormulaProportion", "")).replace("\r", ""), font_small) draw_multiline_text_center(draw, col_width * 1, y_positions[7] + 40, col_width * 3, row_heights[7], - f"{data.get('DayStrengthValue', '')}\n{data.get('NihtStrengthValue', '')}", font_small) + f"{data.get('DayStrengthValue', '')}\n{data.get('NihtStrengthValue', '')}", + font_small) img.save(output_path) print(f"已生成参数化表格:{output_path}") + + # ====================== 动态区结构体 ====================== class EQpageHeader_G6(Structure): _fields_ = [ @@ -208,7 +233,7 @@ def send_dynamic_frame(ip="10.6.242.2", port=5005, frame=None, filename="led_sen print("frame 为空!") return - target_w, target_h = 640, 448 + target_w, target_h = 630, 435 resized = cv2.resize(frame, (target_w, target_h)) save_path = os.path.join(CURRENT_DIR, filename) cv2.imwrite(save_path, resized) diff --git a/LED_send/led_send_old.py b/LED_send/led_send_old.py new file mode 100644 index 0000000..90674b1 --- /dev/null +++ b/LED_send/led_send_old.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +# coding: utf-8 +import os +import cv2 +from PIL import Image, ImageDraw, ImageFont +import ctypes +from ctypes import * +import glob +import sys + +# ============================================================ +# SDK Load +# ============================================================ + +CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) +MAIN_SO_NAME = "libbx_sdkDual.so" +MAIN_SO = os.path.join(CURRENT_DIR, MAIN_SO_NAME) + +def preload_shared_objects(so_dir): + print(f"自动加载 so 路径:{so_dir}") + if not os.path.isdir(so_dir): + print(f"错误:目录不存在: {so_dir}") + return None + + so_list = glob.glob(os.path.join(so_dir, "*.so*")) + iconv_files = [s for s in so_list if "libiconv" in os.path.basename(s)] + loaded = set() + + for f in iconv_files: + try: + ctypes.CDLL(f, mode=ctypes.RTLD_GLOBAL) + print(f"已加载 libiconv: {f}") + loaded.add(f) + except Exception as e: + print(f"加载失败 {f}: {e}") + + for f in so_list: + if os.path.basename(f) == MAIN_SO_NAME or f in loaded: + continue + try: + ctypes.CDLL(f, mode=ctypes.RTLD_GLOBAL) + print(f"已加载依赖库: {f}") + except Exception as e: + print(f"跳过无法加载的库 {f}: {e}") + + if os.path.exists(MAIN_SO): + try: + lib = ctypes.CDLL(MAIN_SO, mode=ctypes.RTLD_GLOBAL) + print(f"成功加载主库: {MAIN_SO}") + return lib + except Exception as e: + print(f"主库加载失败: {MAIN_SO} -> {e}") + return None + else: + print(f"主库不存在: {MAIN_SO}") + return None + +os.environ["LD_LIBRARY_PATH"] = CURRENT_DIR + ":" + os.environ.get("LD_LIBRARY_PATH", "") +os.environ["PATH"] = CURRENT_DIR + ":" + os.environ.get("PATH", "") + +lib = preload_shared_objects(CURRENT_DIR) +if lib is None: + print("无法加载主库,程序退出") + sys.exit(1) + +# ====================== 生成 LED 表格 ====================== +def generate_led_table(data, output_path="led_send.png", font_path="msyh.ttc"): + try: + font_title = ImageFont.truetype(font_path, 24) + font_data = ImageFont.truetype(font_path, 20) + font_data_big = ImageFont.truetype(font_path, 22) + font_small = ImageFont.truetype(font_path, 16) + header_font = ImageFont.truetype(font_path, 26) + except IOError: + print("字体未找到,使用默认字体") + font_title = font_data = font_data_big = font_small = ImageFont.load_default() + header_font = ImageFont.load_default() + + total_width, total_height = 640, 448 + img = Image.new("RGB", (total_width, total_height), (0, 0, 0)) + draw = ImageDraw.Draw(img) + + col_count = 4 + row_count = 8 + row_heights = [int(total_height * 0.095)] * 6 + [int(total_height * 0.15), int(total_height * 0.15)] + y_positions = [0] + for h in row_heights[:-1]: + y_positions.append(y_positions[-1] + h) + col_width = total_width // col_count + + header_text = "浇筑工序信息屏测试" + bbox = draw.textbbox((0, 0), header_text, font=header_font) + tw, th = bbox[2] - bbox[0], bbox[3] - bbox[1] + draw.text(((total_width - tw) // 2, 7), header_text, fill="Yellow", font=header_font) + + # safe float parse + try: + task_quantity = float(data.get("TotMete", 0)) + except Exception: + task_quantity = 0.0 + task_quantity_str = f"{task_quantity}" + + table_data = [ + ["本盘方量", "当前模具", "高斗称值", "低斗称值"], + [str(data.get("PlateVolume", "")), str(data.get("MouldCode", "")), str(data.get("HighBucketWeighingValue", "")), str(data.get("LowBucketWeighingValue", ""))], + ["投料时间", "当前管片", "砼出料温度", "振捣频率"], + [str(data.get("ProduceStartTime", "")), str(data.get("ArtifactID", "")), str(data.get("Temper", "")), str(data.get("VibrationFrequency", ""))], + ["累计盘次", "隐蔽验收", "车间环温", "任务方量"], + [str(data.get("PlateIDSerial", "任务方量")), str(data.get("CheckResult", "")), str(data.get("WorkshopTemperature", "")), ""], + ["配方比例", "", "", ""], + ["拆模强度", "", "", ""] + ] + + # 画表格框 + for r in range(row_count): + y1 = y_positions[r] + 40 + h = row_heights[r] + for c in range(col_count): + x1 = c * col_width + if r >= 6 and c == 1: + draw.rectangle([x1, y1, total_width - 1, y1 + h - 1], outline="white", width=1) + break + elif r >= 6 and c > 1: + continue + else: + draw.rectangle([x1, y1, x1 + col_width - 1, y1 + h - 1], outline="white", width=1) + + # 绘制文本 + for r in range(row_count): + y1 = y_positions[r] + 40 + h = row_heights[r] + for c in range(col_count): + x1 = c * col_width + content = table_data[r][c] + if not content.strip(): + if r == 5 and c == 3: + bbox_task = draw.textbbox((0, 0), task_quantity_str, font=font_data) + tw_task = bbox_task[2] - bbox_task[0] + th_task = bbox_task[3] - bbox_task[1] + # 红色显示任务数量 + draw.text((x1 + (col_width - 1.8 * tw_task) // 2, y1 + (h - th_task) // 2), + task_quantity_str, fill="red", font=font_data) + # 亮绿色显示固定值 "/214.1" + fixed_text = "/214.1" + bbox_fixed = draw.textbbox((0, 0), fixed_text, font=font_data) + tw_fixed = bbox_fixed[2] - bbox_fixed[0] + draw.text((x1 + (col_width - tw_fixed) // 2 + 0.78 * tw_task, y1 + (h - th_task) // 2), + fixed_text, fill=(0, 255, 0), font=font_data) + continue + + is_header = r in (0, 2, 4, 6, 7) + # 亮绿色显示表头 + color = (0, 255, 0) if is_header else "red" + + if color == "red" and r < 3: + font = font_data_big + elif color == "red" and r >= 6: + font = font_small + else: + font = font_title if is_header else font_data + + bbox = draw.textbbox((0, 0), content, font=font) + tw = bbox[2] - bbox[0] + th = bbox[3] - bbox[1] + draw.text((x1 + (col_width - tw) // 2, y1 + (h - th) // 2), content, fill=color, font=font) + + # 多行文本居中函数 + def draw_multiline_text_center(draw_obj, x, y, width, height, text, font_obj, fill="red"): + lines = text.split('\n') + bboxs = [draw_obj.textbbox((0, 0), line, font=font_obj) for line in lines] + total_h = sum(b[3] - b[1] for b in bboxs) + y_start = y + (height - total_h) // 2 + curr_y = y_start + for line, b in zip(lines, bboxs): + w = b[2] - b[0] + h = b[3] - b[1] + draw_obj.text((x + (width - w) // 2, curr_y), line, fill=fill, font=font_obj) + curr_y += h + + draw_multiline_text_center(draw, col_width * 1, y_positions[6] + 40, col_width * 3, row_heights[6], + str(data.get("FormulaProportion", "")).replace("\r", ""), font_small) + draw_multiline_text_center(draw, col_width * 1, y_positions[7] + 40, col_width * 3, row_heights[7], + f"{data.get('DayStrengthValue', '')}\n{data.get('NihtStrengthValue', '')}", font_small) + + img.save(output_path) + print(f"已生成参数化表格:{output_path}") + +# ====================== 动态区结构体 ====================== +class EQpageHeader_G6(Structure): + _fields_ = [ + ("PageStyle", c_uint8), ("DisplayMode", c_uint8), ("ClearMode", c_uint8), + ("Speed", c_uint8), ("StayTime", c_uint16), ("RepeatTime", c_uint8), + ("ValidLen", c_uint8), ("CartoonFrameRate", c_uint8), ("BackNotValidFlag", c_uint8), + ("arrMode", c_uint8), ("fontSize", c_uint8), ("color", c_uint8), + ("fontBold", c_uint8), ("fontItalic", c_uint8), ("tdirection", c_uint8), + ("txtSpace", c_uint8), ("Valign", c_uint8), ("Halign", c_uint8) + ] + +lib.bxDual_dynamicArea_AddAreaPic_6G.argtypes = [ + c_char_p, c_uint32, c_uint8, c_uint8, c_uint16, c_uint16, + c_uint16, c_uint16, POINTER(EQpageHeader_G6), c_char_p +] +lib.bxDual_dynamicArea_AddAreaPic_6G.restype = c_int + +# ====================== 发送动态区帧(丝滑覆盖) ====================== +def send_dynamic_frame(ip="10.6.242.2", port=5005, frame=None, filename="led_send.png"): + if frame is None: + print("frame 为空!") + return + + target_w, target_h = 640, 448 + resized = cv2.resize(frame, (target_w, target_h)) + save_path = os.path.join(CURRENT_DIR, filename) + cv2.imwrite(save_path, resized) + + page = EQpageHeader_G6() + page.PageStyle = 0 + page.DisplayMode = 2 + page.ClearMode = 1 + page.Speed = 10 + page.StayTime = 1000 + page.RepeatTime = 1 + page.ValidLen = 64 + page.CartoonFrameRate = 0 + page.BackNotValidFlag = 0 + page.arrMode = 1 + page.fontSize = 16 + page.color = 1 + page.fontBold = 0 + page.fontItalic = 0 + page.tdirection = 0 + page.txtSpace = 0 + page.Valign = 2 + page.Halign = 1 + + try: + ret = lib.bxDual_dynamicArea_AddAreaPic_6G( + ip.encode("ascii"), port, 2, 0, 0, 0, target_w, target_h, + byref(page), save_path.encode("gb2312") + ) + if ret == 0: + print("Frame 覆盖成功!") + else: + print("Frame 发送失败,返回码:", ret) + except Exception as e: + print("调用 AddAreaPic 失败:", e) + +def send_led_data(data: dict): + img_path = os.path.join(CURRENT_DIR, "led_send.png") + generate_led_table(data, output_path=img_path) + frame = cv2.imread(img_path) + send_dynamic_frame(frame=frame, filename="led_send.png") + +# ============================================================ +# 主程序示例 +# ============================================================ + +if __name__ == "__main__": + data = { + "PlateVolume": "2.00", + "MouldCode": "SHR2B1-3", + "ProduceStartTime": "15:06", + "ArtifactID": "QR2B13099115D", + "Temper": "18.6℃", + "PlateIDSerial": "85", + "CheckResult": "合格", + "TotMete": "353.2", + "LowBucketWeighingValue": "75", + "HighBucketWeighingValue": "115", + "WorkshopTemperature": "12.4℃", + "VibrationFrequency": "10min/220HZ", + "FormulaProportion": "水泥:砂:石:粉煤灰:矿粉:外加剂:水\r\n0.70:1.56:2.78:0.15:0.15:0.006:0.33", + "DayStrengthValue": "白班:2024/11/27 22:00抗压 龄期:15h 强度25.9", + "NihtStrengthValue": "晚班:2024/11/26 07:55抗压 龄期:12h 强度25.2" + } + + send_led_data(data) + diff --git a/LED_send/msyh.ttc b/LED_send/msyh.ttc new file mode 100644 index 0000000..ea174b2 Binary files /dev/null and b/LED_send/msyh.ttc differ diff --git a/muju_cls/main.py b/muju_cls/main.py new file mode 100644 index 0000000..0ecf790 --- /dev/null +++ b/muju_cls/main.py @@ -0,0 +1,120 @@ +import os +import cv2 +from rknnlite.api import RKNNLite + +# classify_single_image, StableClassJudge, CLASS_NAMES 已在 muju_cls_rknn 中定义 +from muju_cls_rknn import classify_single_image, StableClassJudge, CLASS_NAMES + + +def run_stable_classification_loop( + model_path, + roi_file, + image_source, + stable_frames=3, + display_scale=0.5, # 显示缩放比例(0.5 = 显示为原来 50%) + show_window=False # 是否显示窗口 +): + """ + image_source: cv2.VideoCapture 对象 + """ + + judge = StableClassJudge( + stable_frames=stable_frames, + ignore_class=2 # 忽略“有遮挡”类别参与稳定判断 + ) + + cap = image_source + if not hasattr(cap, "read"): + raise TypeError("image_source 必须是 cv2.VideoCapture 实例") + + # 可选:创建可缩放窗口 + if show_window: + cv2.namedWindow("RTSP Stream - Press 'q' to quit", cv2.WINDOW_NORMAL) + + while True: + ret, frame = cap.read() + if not ret: + print("无法读取视频帧(可能是流断开或结束)") + break + + # 上下左右翻转 + frame = cv2.flip(frame, -1) + + # --------------------------- + # 单帧推理 + # --------------------------- + result = classify_single_image(frame, model_path, roi_file) + + class_id = result["class_id"] + class_name = result["class"] + score = result["score"] + + print(f"[FRAME] {class_name} | conf={score:.3f}") + + # --------------------------- + # 稳定判断 + # --------------------------- + stable_class_id = judge.update(class_id) + + if stable_class_id is not None: + print(f"\n稳定输出: {CLASS_NAMES[stable_class_id]}\n") + + # --------------------------- + # 显示画面(缩小窗口) + # --------------------------- + if show_window: + h, w = frame.shape[:2] + display_frame = cv2.resize( + frame, + (int(w * display_scale), int(h * display_scale)), + interpolation=cv2.INTER_AREA + ) + + cv2.imshow("RTSP Stream - Press 'q' to quit", display_frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() + + +if __name__ == "__main__": + # --------------------------- + # 配置参数 + # --------------------------- + MODEL_PATH = "muju_cls.rknn" + ROI_FILE = "./roi_coordinates/muju_roi.txt" + RTSP_URL = "rtsp://admin:XJ123456@192.168.250.61:554/streaming/channels/101" + + STABLE_FRAMES = 3 + DISPLAY_SCALE = 0.5 # 显示窗口缩放比例 + SHOW_WINDOW = False # 部署时改成 False,测试的时候打开 + + # --------------------------- + # 打开 RTSP 视频流 + # --------------------------- + print(f"正在连接 RTSP 流: {RTSP_URL}") + cap = cv2.VideoCapture(RTSP_URL) + + # 降低 RTSP 延迟(部分摄像头支持) + cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) + + if not cap.isOpened(): + print("无法打开 RTSP 流,请检查网络、账号密码或 URL") + exit(1) + + print("RTSP 流连接成功,开始推理...") + + # --------------------------- + # 启动稳定分类循环三帧稳定判断 + # --------------------------- + run_stable_classification_loop( + model_path=MODEL_PATH, + roi_file=ROI_FILE, + image_source=cap, + stable_frames=STABLE_FRAMES, + display_scale=DISPLAY_SCALE, + show_window=SHOW_WINDOW + ) + diff --git a/muju_cls/muju_cls.rknn b/muju_cls/muju_cls.rknn new file mode 100644 index 0000000..ca52394 Binary files /dev/null and b/muju_cls/muju_cls.rknn differ diff --git a/muju_cls/muju_cls100.rknn b/muju_cls/muju_cls100.rknn new file mode 100644 index 0000000..41f7055 Binary files /dev/null and b/muju_cls/muju_cls100.rknn differ diff --git a/muju_cls/muju_cls500.rknn b/muju_cls/muju_cls500.rknn new file mode 100644 index 0000000..5d733df Binary files /dev/null and b/muju_cls/muju_cls500.rknn differ diff --git a/muju_cls/muju_cls_rknn.py b/muju_cls/muju_cls_rknn.py new file mode 100644 index 0000000..d4b4a51 --- /dev/null +++ b/muju_cls/muju_cls_rknn.py @@ -0,0 +1,282 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +from collections import deque + +class StableClassJudge: + """ + 连续三帧稳定判决器: + - class0 / class1 连续 3 帧 -> 输出 + - class2 -> 清空计数,重新统计 + """ + + def __init__(self, stable_frames=3, ignore_class=2): + self.stable_frames = stable_frames + self.ignore_class = ignore_class + self.buffer = deque(maxlen=stable_frames) + + def reset(self): + self.buffer.clear() + + def update(self, class_id): + """ + 输入单帧分类结果 + 返回: + - None:尚未稳定 + - class_id:稳定输出结果 + """ + + # 遇到 class2,直接清空重新计数 + if class_id == self.ignore_class: + self.reset() + return None + + self.buffer.append(class_id) + + # 缓冲未满 + if len(self.buffer) < self.stable_frames: + return None + + # 三帧完全一致 + if len(set(self.buffer)) == 1: + stable_class = self.buffer[0] + self.reset() # 输出一次后重新计数(防止重复触发) + return stable_class + + return None + +# --------------------------- +# 三分类映射,模具车1是小的,模具车2是大的 +# --------------------------- +CLASS_NAMES = { + 0: "模具车1", + 1: "模具车2", + 2: "有遮挡" +} + +# --------------------------- +# RKNN 全局实例(只加载一次) +# --------------------------- +_global_rknn = None + + +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + + ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + + _global_rknn = rknn + print(f"[INFO] RKNN 模型加载成功: {model_path}") + return rknn + + +# --------------------------- +# 预处理 +# --------------------------- +def letterbox(image, new_size=640, color=(114, 114, 114)): + h, w = image.shape[:2] + scale = min(new_size / h, new_size / w) + nh, nw = int(h * scale), int(w * scale) + resized = cv2.resize(image, (nw, nh)) + new_img = np.full((new_size, new_size, 3), color, dtype=np.uint8) + top = (new_size - nh) // 2 + left = (new_size - nw) // 2 + new_img[top:top + nh, left:left + nw] = resized + return new_img + + +def resize_stretch(image, size=640): + return cv2.resize(image, (size, size)) + + +def preprocess_image_for_rknn( + img, + size=640, + resize_mode="stretch", + to_rgb=True, + normalize=False, + layout="NHWC" +): + if resize_mode == "letterbox": + img_box = letterbox(img, new_size=size) + else: + img_box = resize_stretch(img, size=size) + + if to_rgb: + img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB) + + img_f = img_box.astype(np.float32) + + if normalize: + img_f /= 255.0 + + if layout == "NHWC": + out = np.expand_dims(img_f, axis=0) + else: + out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0) + + return np.ascontiguousarray(out) + + +# --------------------------- +# 单次 RKNN 推理(三分类) +# --------------------------- +def rknn_classify_preprocessed(input_tensor, model_path): + rknn = init_rknn_model(model_path) + + input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32)) + outs = rknn.inference([input_tensor]) + + pred = outs[0].reshape(-1).astype(float) # shape = (3,) + class_id = int(np.argmax(pred)) + + return class_id, pred + +# --------------------------- +# ROI +# --------------------------- +def load_single_roi(txt_path): + if not os.path.exists(txt_path): + raise RuntimeError(f"ROI 文件不存在: {txt_path}") + + with open(txt_path) as f: + for line in f: + s = line.strip() + if not s: + continue + x, y, w, h = map(int, s.split(',')) + return (x, y, w, h) + + raise RuntimeError("ROI 文件为空") + + +def crop_and_return_roi(img, roi): + x, y, w, h = roi + h_img, w_img = img.shape[:2] + + if x < 0 or y < 0 or x + w > w_img or y + h > h_img: + raise RuntimeError(f"ROI 超出图像范围: {roi}") + + return img[y:y + h, x:x + w] + + +# --------------------------- +# 单张图片推理(三分类) +# --------------------------- +def classify_single_image( + frame, + model_path, + roi_file, + size=640, + resize_mode="stretch", + to_rgb=True, + normalize=False, + layout="NHWC" +): + if frame is None: + raise FileNotFoundError("输入帧为空") + + roi = load_single_roi(roi_file) + roi_img = crop_and_return_roi(frame, roi) + + input_tensor = preprocess_image_for_rknn( + roi_img, + size=size, + resize_mode=resize_mode, + to_rgb=to_rgb, + normalize=normalize, + layout=layout + ) + + class_id, probs = rknn_classify_preprocessed(input_tensor, model_path) + class_name = CLASS_NAMES.get(class_id, f"未知类别({class_id})") + + return { + "class_id": class_id, + "class": class_name, + "score": round(float(probs[class_id]), 4), + "raw": probs.tolist() + } + + + +# --------------------------- +# 示例调用 +# --------------------------- +if __name__ == "__main__": + model_path = "muju_cls.rknn" + roi_file = "./roi_coordinates/muju_roi.txt" + image_path = "./test_image/test.png" + + frame = cv2.imread(image_path) + if frame is None: + raise FileNotFoundError(f"无法读取图片: {image_path}") + + result = classify_single_image(frame, model_path, roi_file) + print("[RESULT]", result) + +# --------------------------- +# 示例判断逻辑 +''' +import cv2 +from muju_cls_rknn import classify_single_image,StableClassJudge,CLASS_NAMES + +def run_stable_classification_loop( + model_path, + roi_file, + image_source, + stable_frames=3 +): + """ + image_source: + - cv2.VideoCapture + """ + judge = StableClassJudge( + stable_frames=stable_frames, + ignore_class=2 # 有遮挡 + ) + + cap = image_source + if not hasattr(cap, "read"): + raise TypeError("image_source 必须是 cv2.VideoCapture") + + while True: + ret, frame = cap.read() + # 上下左右翻转 + frame = cv2.flip(frame, -1) + + if not ret: + print("读取帧失败,退出") + break + + result = classify_single_image(frame, model_path, roi_file) + + class_id = result["class_id"] + class_name = result["class"] + score = result["score"] + + print(f"[FRAME] {class_name} conf={score}") + + stable = judge.update(class_id) + + if stable is not None: + print(f"\n稳定输出: {CLASS_NAMES[stable]} \n") + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() +''' +# --------------------------- diff --git a/muju_cls/roi_coordinates/muju_roi.txt b/muju_cls/roi_coordinates/muju_roi.txt new file mode 100644 index 0000000..17ec328 --- /dev/null +++ b/muju_cls/roi_coordinates/muju_roi.txt @@ -0,0 +1 @@ +2,880,385,200 diff --git a/muju_cls/test.png b/muju_cls/test.png new file mode 100644 index 0000000..84415fc Binary files /dev/null and b/muju_cls/test.png differ diff --git a/muju_cls/test_imagesave.py b/muju_cls/test_imagesave.py new file mode 100644 index 0000000..dd6601a --- /dev/null +++ b/muju_cls/test_imagesave.py @@ -0,0 +1,275 @@ +import os +import cv2 +import time +import numpy as np +from datetime import datetime +from collections import deque +from rknnlite.api import RKNNLite + +# ===================================================== +# 稳定判决器 +# ===================================================== +class StableClassJudge: + """ + 连续 N 帧稳定判决: + - class0 / class1 连续 N 帧 -> 输出 + - class2 -> 清空计数 + """ + + def __init__(self, stable_frames=3, ignore_class=2): + self.stable_frames = stable_frames + self.ignore_class = ignore_class + self.buffer = deque(maxlen=stable_frames) + + def reset(self): + self.buffer.clear() + + def update(self, class_id): + if class_id == self.ignore_class: + self.reset() + return None + + self.buffer.append(class_id) + + if len(self.buffer) < self.stable_frames: + return None + + if len(set(self.buffer)) == 1: + stable = self.buffer[0] + self.reset() + return stable + + return None + + +# ===================================================== +# 类别定义 +# ===================================================== +CLASS_NAMES = { + 0: "模具车1", + 1: "模具车2", + 2: "有遮挡" +} + + +# ===================================================== +# RKNN 全局实例 +# ===================================================== +_global_rknn = None + + +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + + rknn = RKNNLite(verbose=False) + + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + + ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + + _global_rknn = rknn + print(f"[INFO] RKNN 模型加载成功: {model_path}") + return rknn + + +# ===================================================== +# 图像预处理 +# ===================================================== +def letterbox(image, new_size=640, color=(114, 114, 114)): + h, w = image.shape[:2] + scale = min(new_size / h, new_size / w) + nh, nw = int(h * scale), int(w * scale) + + resized = cv2.resize(image, (nw, nh)) + canvas = np.full((new_size, new_size, 3), color, dtype=np.uint8) + + top = (new_size - nh) // 2 + left = (new_size - nw) // 2 + canvas[top:top + nh, left:left + nw] = resized + return canvas + + +def resize_stretch(image, size=640): + return cv2.resize(image, (size, size)) + + +def preprocess_image_for_rknn( + img, + size=640, + resize_mode="stretch", + to_rgb=True, + normalize=False, + layout="NHWC" +): + if resize_mode == "letterbox": + img = letterbox(img, size) + else: + img = resize_stretch(img, size) + + if to_rgb: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + img = img.astype(np.float32) + + if normalize: + img /= 255.0 + + if layout == "NHWC": + img = np.expand_dims(img, axis=0) + else: + img = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0) + + return np.ascontiguousarray(img) + + +# ===================================================== +# RKNN 单次推理 +# ===================================================== +def rknn_classify_preprocessed(input_tensor, model_path): + rknn = init_rknn_model(model_path) + outs = rknn.inference([input_tensor]) + probs = outs[0].reshape(-1).astype(float) + class_id = int(np.argmax(probs)) + return class_id, probs + + +# ===================================================== +# ROI 处理 +# ===================================================== +def load_single_roi(txt_path): + if not os.path.exists(txt_path): + raise RuntimeError(f"ROI 文件不存在: {txt_path}") + + with open(txt_path) as f: + for line in f: + line = line.strip() + if not line: + continue + x, y, w, h = map(int, line.split(",")) + return (x, y, w, h) + + raise RuntimeError("ROI 文件为空") + + +def crop_and_return_roi(img, roi): + x, y, w, h = roi + H, W = img.shape[:2] + + if x < 0 or y < 0 or x + w > W or y + h > H: + raise RuntimeError(f"ROI 超出图像范围: {roi}") + + return img[y:y + h, x:x + w] + + +# ===================================================== +# 单帧分类 +# ===================================================== +def classify_single_image(frame, model_path, roi_file): + roi = load_single_roi(roi_file) + roi_img = crop_and_return_roi(frame, roi) + + input_tensor = preprocess_image_for_rknn( + roi_img, + size=640, + resize_mode="stretch", + to_rgb=True, + normalize=False, + layout="NHWC" + ) + + class_id, probs = rknn_classify_preprocessed(input_tensor, model_path) + + return { + "class_id": class_id, + "class": CLASS_NAMES[class_id], + "score": round(float(probs[class_id]), 4), + "raw": probs.tolist() + } + + +# ===================================================== +# RTSP 推理 + 保存分类结果 +# ===================================================== +def run_rtsp_classification_and_save( + model_path, + roi_file, + rtsp_url, + save_root="clsimg", + stable_frames=3, + save_mode="all" # all / stable +): + for cid in CLASS_NAMES.keys(): + os.makedirs(os.path.join(save_root, f"class{cid}"), exist_ok=True) + + cap = cv2.VideoCapture(rtsp_url) + if not cap.isOpened(): + raise RuntimeError(f"无法打开 RTSP: {rtsp_url}") + + judge = StableClassJudge(stable_frames=stable_frames, ignore_class=2) + + print("[INFO] RTSP 推理开始") + + while True: + ret, frame = cap.read() + if not ret: + print("[WARN] RTSP 读帧失败") + time.sleep(0.1) + continue + + frame = cv2.flip(frame, -1) + + result = classify_single_image(frame, model_path, roi_file) + class_id = result["class_id"] + score = result["score"] + + print(f"[FRAME] {result['class']} conf={score}") + + stable = judge.update(class_id) + + save_flag = False + save_class = class_id + + if save_mode == "all": + save_flag = True + elif save_mode == "stable" and stable is not None: + save_flag = True + save_class = stable + + if save_flag: + ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f") + filename = f"{ts}_conf{score:.2f}.jpg" + save_dir = os.path.join(save_root, f"class{save_class}") + cv2.imwrite(os.path.join(save_dir, filename), frame) + print(f"[SAVE] class{save_class}/{filename}") + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() + + +# ===================================================== +# main +# ===================================================== +if __name__ == "__main__": + model_path = "muju_cls.rknn" + roi_file = "./roi_coordinates/muju_roi.txt" + + rtsp_url = "rtsp://admin:XJ123456@192.168.250.61:554/streaming/channels/101" + + run_rtsp_classification_and_save( + model_path=model_path, + roi_file=roi_file, + rtsp_url=rtsp_url, + save_root="clsimg", + stable_frames=3, + save_mode="all" # 改成 "stable" 只存稳定结果 + ) + diff --git a/yemian_seg_diff/debug_mid/111.png b/yemian_seg_diff/debug_mid/111.png new file mode 100644 index 0000000..e4b87e1 Binary files /dev/null and b/yemian_seg_diff/debug_mid/111.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_input_640.png b/yemian_seg_diff/debug_mid/roi0_input_640.png new file mode 100644 index 0000000..a63415f Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_input_640.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_proto_mean.png b/yemian_seg_diff/debug_mid/roi0_proto_mean.png new file mode 100644 index 0000000..7df5662 Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_proto_mean.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride16_3.png b/yemian_seg_diff/debug_mid/roi0_stride16_3.png new file mode 100644 index 0000000..d7566c8 Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride16_3.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride16_4.png b/yemian_seg_diff/debug_mid/roi0_stride16_4.png new file mode 100644 index 0000000..10c49fc Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride16_4.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride16_5.png b/yemian_seg_diff/debug_mid/roi0_stride16_5.png new file mode 100644 index 0000000..2bf7499 Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride16_5.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride32_6.png b/yemian_seg_diff/debug_mid/roi0_stride32_6.png new file mode 100644 index 0000000..19f529d Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride32_6.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride32_7.png b/yemian_seg_diff/debug_mid/roi0_stride32_7.png new file mode 100644 index 0000000..498b24a Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride32_7.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride32_8.png b/yemian_seg_diff/debug_mid/roi0_stride32_8.png new file mode 100644 index 0000000..e4b87e1 Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride32_8.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride8_0.png b/yemian_seg_diff/debug_mid/roi0_stride8_0.png new file mode 100644 index 0000000..f0826f9 Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride8_0.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride8_1.png b/yemian_seg_diff/debug_mid/roi0_stride8_1.png new file mode 100644 index 0000000..08d7ded Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride8_1.png differ diff --git a/yemian_seg_diff/debug_mid/roi0_stride8_2.png b/yemian_seg_diff/debug_mid/roi0_stride8_2.png new file mode 100644 index 0000000..f07dc59 Binary files /dev/null and b/yemian_seg_diff/debug_mid/roi0_stride8_2.png differ diff --git a/yemian_seg_diff/debug_mid/zhongjianjieguo.py b/yemian_seg_diff/debug_mid/zhongjianjieguo.py new file mode 100644 index 0000000..6380c3c --- /dev/null +++ b/yemian_seg_diff/debug_mid/zhongjianjieguo.py @@ -0,0 +1,291 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# --------------------------- +# 配置 +# --------------------------- +ROIS = [ + (445, 540, 931, 319), +] + +IMG_SIZE = 640 +STRIDES = [8, 16, 32] +OBJ_THRESH = 0.25 +MASK_THRESH = 0.5 + +_global_rknn = None + +# --------------------------- +# RKNN 全局加载 +# --------------------------- +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + ret = rknn.init_runtime() + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + + _global_rknn = rknn + print(f"[INFO] RKNN Seg 模型加载成功: {model_path}") + return rknn + +# --------------------------- +# 工具函数 +# --------------------------- +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +def dfl_decode(dfl): + bins = np.arange(16) + dfl = sigmoid(dfl) + dfl /= np.sum(dfl, axis=1, keepdims=True) + return np.sum(dfl * bins, axis=1) + +def largest_intersect_cc(mask_bin, bbox): + x1, y1, x2, y2 = bbox + contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if not contours: + return np.zeros_like(mask_bin, dtype=np.uint8) + + max_inter = 0 + best = np.zeros_like(mask_bin, dtype=np.uint8) + for cnt in contours: + tmp = np.zeros_like(mask_bin, dtype=np.uint8) + cv2.drawContours(tmp, [cnt], -1, 1, -1) + cx, cy, cw, ch = cv2.boundingRect(cnt) + ix1 = max(cx, x1) + iy1 = max(cy, y1) + ix2 = min(cx + cw, x2) + iy2 = min(cy + ch, y2) + area = max(0, ix2 - ix1) * max(0, iy2 - iy1) + if area > max_inter: + max_inter = area + best = tmp + return best + +# --------------------------- +# RANSAC 直线拟合(剔除离散点) +# --------------------------- +def fit_line_ransac(pts, max_dist=2.5, min_inliers_ratio=0.6, iters=100): + """ + 拟合 x = m*y + b + pts: Nx2 -> [x,y] + """ + if len(pts) < 10: + return None + + xs = pts[:, 0] + ys = pts[:, 1] + + best_m, best_b = None, None + best_inliers = 0 + + for _ in range(iters): + idx = np.random.choice(len(pts), 2, replace=False) + y1, y2 = ys[idx] + x1, x2 = xs[idx] + if abs(y2 - y1) < 1e-3: + continue + + m = (x2 - x1) / (y2 - y1) + b = x1 - m * y1 + + x_pred = m * ys + b + dist = np.abs(xs - x_pred) + inliers = dist < max_dist + cnt = np.sum(inliers) + + if cnt > best_inliers: + best_inliers = cnt + best_m, best_b = m, b + + if best_m is None: + return None + + if best_inliers / len(pts) < min_inliers_ratio: + return None + + return best_m, best_b + +# --------------------------- +# Seg 推理 +# --------------------------- +def seg_infer(roi): + rknn = _global_rknn + h0, w0 = roi.shape[:2] + + inp_img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE)) + inp = inp_img[..., ::-1][None, ...] # BGR -> RGB + outputs = rknn.inference([inp]) + + proto = outputs[12][0] + proto_h, proto_w = proto.shape[1:] + + best_score = -1 + best_coef = None + best_bbox = None + + out_i = 0 + for stride in STRIDES: + reg = outputs[out_i][0] + cls = outputs[out_i + 1][0, 0] + obj = outputs[out_i + 2][0, 0] + coef = outputs[out_i + 3][0] + out_i += 4 + + score_map = sigmoid(cls) * sigmoid(obj) + y, x = np.unravel_index(np.argmax(score_map), score_map.shape) + score = score_map[y, x] + + if score > best_score and score > OBJ_THRESH: + best_score = score + best_coef = coef[:, y, x] + + dfl = reg[:, y, x].reshape(4, 16) + l, t, r, b = dfl_decode(dfl) + + cx = (x + 0.5) * stride + cy = (y + 0.5) * stride + + scale = proto_w / IMG_SIZE + x1 = int((cx - l) * scale) + y1 = int((cy - t) * scale) + x2 = int((cx + r) * scale) + y2 = int((cy + b) * scale) + + best_bbox = ( + max(0, x1), max(0, y1), + min(proto_w, x2), min(proto_h, y2) + ) + + if best_coef is None: + return np.zeros((h0, w0), dtype=np.uint8) + + proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) > MASK_THRESH + proto_mask = proto_mask.astype(np.uint8) + + mask_final = largest_intersect_cc(proto_mask, best_bbox) + mask_roi = cv2.resize(mask_final, (w0, h0), interpolation=cv2.INTER_NEAREST) * 255 + return mask_roi.astype(np.uint8) + +# --------------------------- +# PC 后处理 +# --------------------------- +def extract_left_right_edge_points(mask_bin): + h, w = mask_bin.shape + left_pts, right_pts = [], [] + for y in range(h): + xs = np.where(mask_bin[y] > 0)[0] + if len(xs) >= 2: + left_pts.append([xs.min(), y]) + right_pts.append([xs.max(), y]) + return np.array(left_pts), np.array(right_pts) + +def filter_by_seg_y_ratio(pts, y_start=0.35, y_end=0.85): + if len(pts) < 2: + return pts + y_min, y_max = pts[:, 1].min(), pts[:, 1].max() + h = y_max - y_min + if h < 10: + return pts + y0 = y_min + int(h * y_start) + y1 = y_min + int(h * y_end) + return pts[(pts[:, 1] >= y0) & (pts[:, 1] <= y1)] + +def get_y_ref(mask_bin): + h, w = mask_bin.shape + ys = [] + for x in range(int(w * 0.2), int(w * 0.8)): + y = np.where(mask_bin[:, x] > 0)[0] + if len(y): + ys.append(y.max()) + return int(np.mean(ys)) if ys else h // 2 + +# --------------------------- +# 单张图计算函数 +# --------------------------- +def caculate_yemian_diff(img, return_vis=True): + if _global_rknn is None: + raise RuntimeError("请先 init_rknn_model()") + + vis = img.copy() if return_vis else None + result_data = None + + for rx, ry, rw, rh in ROIS: + roi = img[ry:ry + rh, rx:rx + rw] + mask_bin = seg_infer(roi) // 255 + + if return_vis: + green = np.zeros_like(roi) + green[mask_bin == 1] = (0, 255, 0) + vis[ry:ry + rh, rx:rx + rw] = cv2.addWeighted( + roi, 0.7, green, 0.3, 0 + ) + + left_pts, right_pts = extract_left_right_edge_points(mask_bin) + left_pts = filter_by_seg_y_ratio(left_pts) + right_pts = filter_by_seg_y_ratio(right_pts) + + left_line = fit_line_ransac(left_pts) + right_line = fit_line_ransac(right_pts) + if left_line is None or right_line is None: + continue + + m1, b1 = left_line + m2, b2 = right_line + + y_ref = get_y_ref(mask_bin) + x_left = int(m1 * y_ref + b1) + x_right = int(m2 * y_ref + b2) + + X_L, X_R, Y = rx + x_left, rx + x_right, ry + y_ref + diff = X_R - X_L + result_data = (X_L, Y, X_R, Y, diff) + + if return_vis: + roi_vis = vis[ry:ry + rh, rx:rx + rw] + cv2.line(roi_vis, (int(b1), 0), (int(m1 * rh + b1), rh), (0, 0, 255), 3) + cv2.line(roi_vis, (int(b2), 0), (int(m2 * rh + b2), rh), (255, 0, 0), 3) + cv2.line(roi_vis, (0, y_ref), (rw, y_ref), (0, 255, 255), 2) + cv2.circle(roi_vis, (x_left, y_ref), 6, (0, 0, 255), -1) + cv2.circle(roi_vis, (x_right, y_ref), 6, (255, 0, 0), -1) + cv2.putText( + roi_vis, f"diff={diff}px", + (10, 40), + cv2.FONT_HERSHEY_SIMPLEX, + 1, (0, 255, 255), 2 + ) + + return result_data, vis + +# --------------------------- +# main +# --------------------------- +if __name__ == "__main__": + RKNN_MODEL_PATH = "seg.rknn" + IMAGE_PATH = "2.png" + + init_rknn_model(RKNN_MODEL_PATH) + + img = cv2.imread(IMAGE_PATH) + if img is None: + raise FileNotFoundError(IMAGE_PATH) + + result_data, vis_img = caculate_yemian_diff(img, return_vis=True) + + if result_data: + XL, YL, XR, YR, diff = result_data + print(f"左交点: ({XL},{YL}) 右交点: ({XR},{YR}) diff={diff}px") + + if vis_img is not None: + cv2.imwrite("vis_output.png", vis_img) + print("可视化结果保存到 vis_output.png") + diff --git a/yemian_seg_diff/main.py b/yemian_seg_diff/main.py new file mode 100644 index 0000000..3b5a189 --- /dev/null +++ b/yemian_seg_diff/main.py @@ -0,0 +1,291 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# --------------------------- +# 配置 +# --------------------------- +ROIS = [ + (445, 540, 931, 319), +] + +IMG_SIZE = 640 +STRIDES = [8, 16, 32] +OBJ_THRESH = 0.25 +MASK_THRESH = 0.5 + +_global_rknn = None + +# --------------------------- +# RKNN 全局加载 +# --------------------------- +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + ret = rknn.init_runtime() + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + + _global_rknn = rknn + print(f"[INFO] RKNN Seg 模型加载成功: {model_path}") + return rknn + +# --------------------------- +# 工具函数 +# --------------------------- +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +def dfl_decode(dfl): + bins = np.arange(16) + dfl = sigmoid(dfl) + dfl /= np.sum(dfl, axis=1, keepdims=True) + return np.sum(dfl * bins, axis=1) + +def largest_intersect_cc(mask_bin, bbox): + x1, y1, x2, y2 = bbox + contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if not contours: + return np.zeros_like(mask_bin, dtype=np.uint8) + + max_inter = 0 + best = np.zeros_like(mask_bin, dtype=np.uint8) + for cnt in contours: + tmp = np.zeros_like(mask_bin, dtype=np.uint8) + cv2.drawContours(tmp, [cnt], -1, 1, -1) + cx, cy, cw, ch = cv2.boundingRect(cnt) + ix1 = max(cx, x1) + iy1 = max(cy, y1) + ix2 = min(cx + cw, x2) + iy2 = min(cy + ch, y2) + area = max(0, ix2 - ix1) * max(0, iy2 - iy1) + if area > max_inter: + max_inter = area + best = tmp + return best + +# --------------------------- +# RANSAC 直线拟合(核心新增) +# --------------------------- +def fit_line_ransac(pts, max_dist=2.5, min_inliers_ratio=0.6, iters=100): + """ + 拟合 x = m*y + b + pts: Nx2 -> [x,y] + """ + if len(pts) < 10: + return None + + xs = pts[:, 0] + ys = pts[:, 1] + + best_m, best_b = None, None + best_inliers = 0 + + for _ in range(iters): + idx = np.random.choice(len(pts), 2, replace=False) + y1, y2 = ys[idx] + x1, x2 = xs[idx] + if abs(y2 - y1) < 1e-3: + continue + + m = (x2 - x1) / (y2 - y1) + b = x1 - m * y1 + + x_pred = m * ys + b + dist = np.abs(xs - x_pred) + inliers = dist < max_dist + cnt = np.sum(inliers) + + if cnt > best_inliers: + best_inliers = cnt + best_m, best_b = m, b + + if best_m is None: + return None + + if best_inliers / len(pts) < min_inliers_ratio: + return None + + return best_m, best_b + +# --------------------------- +# Seg 推理 +# --------------------------- +def seg_infer(roi): + rknn = _global_rknn + h0, w0 = roi.shape[:2] + + inp_img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE)) + inp = inp_img[..., ::-1][None, ...] # BGR -> RGB + outputs = rknn.inference([inp]) + + proto = outputs[12][0] + proto_h, proto_w = proto.shape[1:] + + best_score = -1 + best_coef = None + best_bbox = None + + out_i = 0 + for stride in STRIDES: + reg = outputs[out_i][0] + cls = outputs[out_i + 1][0, 0] + obj = outputs[out_i + 2][0, 0] + coef = outputs[out_i + 3][0] + out_i += 4 + + score_map = sigmoid(cls) * sigmoid(obj) + y, x = np.unravel_index(np.argmax(score_map), score_map.shape) + score = score_map[y, x] + + if score > best_score and score > OBJ_THRESH: + best_score = score + best_coef = coef[:, y, x] + + dfl = reg[:, y, x].reshape(4, 16) + l, t, r, b = dfl_decode(dfl) + + cx = (x + 0.5) * stride + cy = (y + 0.5) * stride + + scale = proto_w / IMG_SIZE + x1 = int((cx - l) * scale) + y1 = int((cy - t) * scale) + x2 = int((cx + r) * scale) + y2 = int((cy + b) * scale) + + best_bbox = ( + max(0, x1), max(0, y1), + min(proto_w, x2), min(proto_h, y2) + ) + + if best_coef is None: + return np.zeros((h0, w0), dtype=np.uint8) + + proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) > MASK_THRESH + proto_mask = proto_mask.astype(np.uint8) + + mask_final = largest_intersect_cc(proto_mask, best_bbox) + mask_roi = cv2.resize(mask_final, (w0, h0), interpolation=cv2.INTER_NEAREST) * 255 + return mask_roi.astype(np.uint8) + +# --------------------------- +# PC 后处理 +# --------------------------- +def extract_left_right_edge_points(mask_bin): + h, w = mask_bin.shape + left_pts, right_pts = [], [] + for y in range(h): + xs = np.where(mask_bin[y] > 0)[0] + if len(xs) >= 2: + left_pts.append([xs.min(), y]) + right_pts.append([xs.max(), y]) + return np.array(left_pts), np.array(right_pts) + +def filter_by_seg_y_ratio(pts, y_start=0.35, y_end=0.85): + if len(pts) < 2: + return pts + y_min, y_max = pts[:, 1].min(), pts[:, 1].max() + h = y_max - y_min + if h < 10: + return pts + y0 = y_min + int(h * y_start) + y1 = y_min + int(h * y_end) + return pts[(pts[:, 1] >= y0) & (pts[:, 1] <= y1)] + +def get_y_ref(mask_bin): + h, w = mask_bin.shape + ys = [] + for x in range(int(w * 0.2), int(w * 0.8)): + y = np.where(mask_bin[:, x] > 0)[0] + if len(y): + ys.append(y.max()) + return int(np.mean(ys)) if ys else h // 2 + +# --------------------------- +# 单张图计算函数 +# --------------------------- +def caculate_yemian_diff(img, return_vis=True): + if _global_rknn is None: + raise RuntimeError("请先 init_rknn_model()") + + vis = img.copy() if return_vis else None + result_data = None + + for rx, ry, rw, rh in ROIS: + roi = img[ry:ry + rh, rx:rx + rw] + mask_bin = seg_infer(roi) // 255 + + if return_vis: + green = np.zeros_like(roi) + green[mask_bin == 1] = (0, 255, 0) + vis[ry:ry + rh, rx:rx + rw] = cv2.addWeighted( + roi, 0.7, green, 0.3, 0 + ) + + left_pts, right_pts = extract_left_right_edge_points(mask_bin) + left_pts = filter_by_seg_y_ratio(left_pts) + right_pts = filter_by_seg_y_ratio(right_pts) + + left_line = fit_line_ransac(left_pts) + right_line = fit_line_ransac(right_pts) + if left_line is None or right_line is None: + continue + + m1, b1 = left_line + m2, b2 = right_line + + y_ref = get_y_ref(mask_bin) + x_left = int(m1 * y_ref + b1) + x_right = int(m2 * y_ref + b2) + + X_L, X_R, Y = rx + x_left, rx + x_right, ry + y_ref + diff = X_R - X_L + result_data = (X_L, Y, X_R, Y, diff) + + if return_vis: + roi_vis = vis[ry:ry + rh, rx:rx + rw] + cv2.line(roi_vis, (int(b1), 0), (int(m1 * rh + b1), rh), (0, 0, 255), 3) + cv2.line(roi_vis, (int(b2), 0), (int(m2 * rh + b2), rh), (255, 0, 0), 3) + cv2.line(roi_vis, (0, y_ref), (rw, y_ref), (0, 255, 255), 2) + cv2.circle(roi_vis, (x_left, y_ref), 6, (0, 0, 255), -1) + cv2.circle(roi_vis, (x_right, y_ref), 6, (255, 0, 0), -1) + cv2.putText( + roi_vis, f"diff={diff}px", + (10, 40), + cv2.FONT_HERSHEY_SIMPLEX, + 1, (0, 255, 255), 2 + ) + + return result_data, vis + +# --------------------------- +# main +# --------------------------- +if __name__ == "__main__": + RKNN_MODEL_PATH = "seg700.rknn" + IMAGE_PATH = "7.png" + + init_rknn_model(RKNN_MODEL_PATH) + + img = cv2.imread(IMAGE_PATH) + if img is None: + raise FileNotFoundError(IMAGE_PATH) + + result_data, vis_img = caculate_yemian_diff(img, return_vis=True) + + if result_data: + XL, YL, XR, YR, diff = result_data + print(f"左交点: ({XL},{YL}) 右交点: ({XR},{YR}) diff={diff}px") + + if vis_img is not None: + cv2.imwrite("vis_output.png", vis_img) + print("可视化结果保存到 vis_output.png") + diff --git a/yemian_seg_diff/seg500.rknn b/yemian_seg_diff/seg500.rknn new file mode 100644 index 0000000..e9843a3 Binary files /dev/null and b/yemian_seg_diff/seg500.rknn differ diff --git a/yemian_seg_diff/seg700.rknn b/yemian_seg_diff/seg700.rknn new file mode 100644 index 0000000..d4139ae Binary files /dev/null and b/yemian_seg_diff/seg700.rknn differ diff --git a/yemian_seg_diff/seg_old.rknn b/yemian_seg_diff/seg_old.rknn new file mode 100644 index 0000000..8be65ca Binary files /dev/null and b/yemian_seg_diff/seg_old.rknn differ diff --git a/yemian_seg_diff/test_image/1 (copy 1).png b/yemian_seg_diff/test_image/1 (copy 1).png new file mode 100644 index 0000000..1bb3033 Binary files /dev/null and b/yemian_seg_diff/test_image/1 (copy 1).png differ diff --git a/zhuangtai_class_cls_1980x1080/test_image/1.png b/yemian_seg_diff/test_image/1.png similarity index 100% rename from zhuangtai_class_cls_1980x1080/test_image/1.png rename to yemian_seg_diff/test_image/1.png diff --git a/yemian_seg_diff/test_image/2 (copy 1).png b/yemian_seg_diff/test_image/2 (copy 1).png new file mode 100644 index 0000000..95c7116 Binary files /dev/null and b/yemian_seg_diff/test_image/2 (copy 1).png differ diff --git a/yemian_seg_diff/test_image/2.png b/yemian_seg_diff/test_image/2.png new file mode 100644 index 0000000..0288cec Binary files /dev/null and b/yemian_seg_diff/test_image/2.png differ diff --git a/yemian_seg_diff/test_image/3 (copy 1).png b/yemian_seg_diff/test_image/3 (copy 1).png new file mode 100644 index 0000000..1aa6772 Binary files /dev/null and b/yemian_seg_diff/test_image/3 (copy 1).png differ diff --git a/yemian_seg_diff/test_image/3.png b/yemian_seg_diff/test_image/3.png new file mode 100644 index 0000000..1aa6772 Binary files /dev/null and b/yemian_seg_diff/test_image/3.png differ diff --git a/yemian_seg_diff/test_image/33.png b/yemian_seg_diff/test_image/33.png new file mode 100644 index 0000000..1bb3033 Binary files /dev/null and b/yemian_seg_diff/test_image/33.png differ diff --git a/yemian_seg_diff/test_image/4.png b/yemian_seg_diff/test_image/4.png new file mode 100644 index 0000000..d16878e Binary files /dev/null and b/yemian_seg_diff/test_image/4.png differ diff --git a/yemian_seg_diff/test_image/5.png b/yemian_seg_diff/test_image/5.png new file mode 100644 index 0000000..cf3a904 Binary files /dev/null and b/yemian_seg_diff/test_image/5.png differ diff --git a/yemian_seg_diff/test_image/6.png b/yemian_seg_diff/test_image/6.png new file mode 100644 index 0000000..9ace8c2 Binary files /dev/null and b/yemian_seg_diff/test_image/6.png differ diff --git a/yemian_seg_diff/test_image/7.png b/yemian_seg_diff/test_image/7.png new file mode 100644 index 0000000..c23fe86 Binary files /dev/null and b/yemian_seg_diff/test_image/7.png differ diff --git a/yemian_seg_diff_old/61seg.rknn b/yemian_seg_diff_old/61seg.rknn new file mode 100644 index 0000000..8be65ca Binary files /dev/null and b/yemian_seg_diff_old/61seg.rknn differ diff --git a/yemian_seg_diff_old/test_image/1.png b/yemian_seg_diff_old/test_image/1.png new file mode 100644 index 0000000..2a4c35a Binary files /dev/null and b/yemian_seg_diff_old/test_image/1.png differ diff --git a/yemian_seg_diff_old/test_image/2.png b/yemian_seg_diff_old/test_image/2.png new file mode 100644 index 0000000..0288cec Binary files /dev/null and b/yemian_seg_diff_old/test_image/2.png differ diff --git a/yemian_seg_diff_old/test_image/3.png b/yemian_seg_diff_old/test_image/3.png new file mode 100644 index 0000000..1aa6772 Binary files /dev/null and b/yemian_seg_diff_old/test_image/3.png differ diff --git a/yemian_seg_diff_old/test_image/33.png b/yemian_seg_diff_old/test_image/33.png new file mode 100644 index 0000000..1bb3033 Binary files /dev/null and b/yemian_seg_diff_old/test_image/33.png differ diff --git a/yemian_seg_diff_old/vis_output.png b/yemian_seg_diff_old/vis_output.png new file mode 100644 index 0000000..694342e Binary files /dev/null and b/yemian_seg_diff_old/vis_output.png differ diff --git a/yemian_seg_diff_old/yemian_seg_diff.py b/yemian_seg_diff_old/yemian_seg_diff.py new file mode 100644 index 0000000..909f42a --- /dev/null +++ b/yemian_seg_diff_old/yemian_seg_diff.py @@ -0,0 +1,225 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# --------------------------- +# 配置 +# --------------------------- +ROIS = [ + (445, 540, 931, 319), +] + +IMG_SIZE = 640 +STRIDES = [8, 16, 32] +OBJ_THRESH = 0.25 +MASK_THRESH = 0.5 + +_global_rknn = None + +# --------------------------- +# RKNN 全局加载 +# --------------------------- +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + ret = rknn.init_runtime() + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + _global_rknn = rknn + print(f"[INFO] RKNN Seg 模型加载成功: {model_path}") + return rknn + +# --------------------------- +# 工具函数 +# --------------------------- +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +def dfl_decode(dfl): + bins = np.arange(16) + dfl = sigmoid(dfl) + dfl /= np.sum(dfl, axis=1, keepdims=True) + return np.sum(dfl * bins, axis=1) + +def largest_intersect_cc(mask_bin, bbox): + x1, y1, x2, y2 = bbox + contours, _ = cv2.findContours(mask_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if len(contours) == 0: + return np.zeros_like(mask_bin, dtype=np.uint8) + max_inter_area = 0 + mask_final = np.zeros_like(mask_bin, dtype=np.uint8) + for cnt in contours: + cnt_mask = np.zeros_like(mask_bin, dtype=np.uint8) + cv2.drawContours(cnt_mask, [cnt], -1, 1, -1) + cx, cy, cw, ch = cv2.boundingRect(cnt) + cx2, cy2 = cx+cw, cy+ch + inter_x1 = max(cx, x1) + inter_y1 = max(cy, y1) + inter_x2 = min(cx2, x2) + inter_y2 = min(cy2, y2) + inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1) + if inter_area > max_inter_area: + max_inter_area = inter_area + mask_final = cnt_mask + return mask_final + +# --------------------------- +# Seg 推理 +# --------------------------- +def seg_infer(roi): + rknn = _global_rknn + h0, w0 = roi.shape[:2] + inp_img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE)) + inp = inp_img[..., ::-1][None, ...] # BGR->RGB + outputs = rknn.inference([inp]) + proto = outputs[12][0] + proto_h, proto_w = proto.shape[1:] + + best_score = -1 + best_coef = None + best_bbox = None + out_i = 0 + for stride in STRIDES: + reg = outputs[out_i][0] + cls = outputs[out_i+1][0,0] + obj = outputs[out_i+2][0,0] + coef = outputs[out_i+3][0] + out_i += 4 + score_map = sigmoid(cls) * sigmoid(obj) + y, x = np.unravel_index(np.argmax(score_map), score_map.shape) + score = score_map[y, x] + if score > best_score and score > OBJ_THRESH: + best_score = score + best_coef = coef[:, y, x] + dfl = reg[:, y, x].reshape(4,16) + l,t,r,b = dfl_decode(dfl) + cx = (x+0.5)*stride + cy = (y+0.5)*stride + # proto bbox + scale = proto_w / IMG_SIZE + x1 = int((cx-l)*scale) + y1 = int((cy-t)*scale) + x2 = int((cx+r)*scale) + y2 = int((cy+b)*scale) + x1,y1 = max(0,x1), max(0,y1) + x2,y2 = min(proto_w,x2), min(proto_h,y2) + best_bbox = (x1,y1,x2,y2) + + if best_coef is None: + return np.zeros((h0,w0), dtype=np.uint8) + + proto_mask = sigmoid(np.tensordot(best_coef, proto, axes=1)) > MASK_THRESH + proto_mask = proto_mask.astype(np.uint8) + mask_final = largest_intersect_cc(proto_mask, best_bbox) + mask_roi = cv2.resize(mask_final, (w0,h0), interpolation=cv2.INTER_NEAREST) * 255 + return mask_roi.astype(np.uint8) + +# --------------------------- +# PC 后处理 +# --------------------------- +def extract_left_right_edge_points(mask_bin): + h, w = mask_bin.shape + left_pts, right_pts = [], [] + for y in range(h): + xs = np.where(mask_bin[y]>0)[0] + if len(xs)>=2: + left_pts.append([xs.min(), y]) + right_pts.append([xs.max(), y]) + return np.array(left_pts), np.array(right_pts) + +def filter_by_seg_y_ratio(pts, y_start=0.35, y_end=0.85): + if len(pts)<2: return pts + y_min, y_max = pts[:,1].min(), pts[:,1].max() + h = y_max - y_min + if h<10: return pts + y0 = y_min + int(h*y_start) + y1 = y_min + int(h*y_end) + return pts[(pts[:,1]>=y0) & (pts[:,1]<=y1)] + +def fit_line(pts): + if len(pts)<2: return None + m,b = np.polyfit(pts[:,1], pts[:,0],1) + return m,b + +def get_y_ref(mask_bin): + h,w = mask_bin.shape + ys=[] + for x in range(int(w*0.2), int(w*0.8)): + y = np.where(mask_bin[:,x]>0)[0] + if len(y): ys.append(y.max()) + return int(np.mean(ys)) if ys else h//2 + +# --------------------------- +# 单张图计算函数 +# --------------------------- +def caculate_yemian_diff(img, return_vis=True): + if _global_rknn is None: + raise RuntimeError("请先 init_rknn_model() 加载 RKNN 模型") + + vis = img.copy() if return_vis else None + result_data = None + + for rx,ry,rw,rh in ROIS: + roi = img[ry:ry+rh, rx:rx+rw] + mask_bin = seg_infer(roi)//255 + + if return_vis: + green = np.zeros_like(roi) + green[mask_bin==1]=(0,255,0) + vis[ry:ry+rh, rx:rx+rw] = cv2.addWeighted(roi,0.7,green,0.3,0) + + # 边界点处理 + left_pts, right_pts = extract_left_right_edge_points(mask_bin) + left_pts = filter_by_seg_y_ratio(left_pts) + right_pts = filter_by_seg_y_ratio(right_pts) + left_line = fit_line(left_pts) + right_line = fit_line(right_pts) + if left_line is None or right_line is None: + continue + + m1,b1 = left_line + m2,b2 = right_line + y_ref = get_y_ref(mask_bin) + x_left = int(m1*y_ref + b1) + x_right = int(m2*y_ref + b2) + X_L, X_R, Y = rx+x_left, rx+x_right, ry+y_ref + diff = X_R - X_L + result_data = (X_L,Y,X_R,Y,diff) + + if return_vis: + roi_vis = vis[ry:ry+rh, rx:rx+rw] + for (m,b),c in [((m1,b1),(0,0,255)), ((m2,b2),(255,0,0))]: + cv2.line(roi_vis, (int(m*0+b),0),(int(m*rh+b),rh),c,3) + cv2.line(roi_vis,(0,y_ref),(rw,y_ref),(0,255,255),2) + cv2.circle(roi_vis,(x_left,y_ref),6,(0,0,255),-1) + cv2.circle(roi_vis,(x_right,y_ref),6,(255,0,0),-1) + cv2.putText(roi_vis,f"diff={diff}px",(10,40),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,255),2) + + return result_data, vis + +# --------------------------- +# main 测试 +# --------------------------- +if __name__=="__main__": + RKNN_MODEL_PATH = "61seg.rknn" + IMAGE_PATH = "./test_image/33.png" + + init_rknn_model(RKNN_MODEL_PATH) + img = cv2.imread(IMAGE_PATH) + if img is None: + raise FileNotFoundError(f"无法读取图片: {IMAGE_PATH}") + + result_data, vis_img = caculate_yemian_diff(img, return_vis=True) + if result_data: + XL,YL,XR,YR,diff = result_data + print(f"左交点: ({XL},{YL}), 右交点: ({XR},{YR}), diff={diff}px") + if vis_img is not None: + cv2.imwrite("vis_output.png", vis_img) + print("可视化结果保存到 vis_output.png") + diff --git a/zhuangtai_class_cls_1980x1080/best.pt b/zhuangtai_class_cls_1980x1080/best.pt deleted file mode 100644 index 3fd6d73..0000000 Binary files a/zhuangtai_class_cls_1980x1080/best.pt and /dev/null differ diff --git a/zhuangtai_class_cls_1980x1080/yiliao_cls.rknn b/zhuangtai_class_cls_1980x1080/yiliao_cls.rknn deleted file mode 100644 index d0f2304..0000000 Binary files a/zhuangtai_class_cls_1980x1080/yiliao_cls.rknn and /dev/null differ diff --git a/zhuangtai_class_cls_1980x1080/yiliao_cls_old.rknn b/zhuangtai_class_cls_1980x1080/yiliao_cls_old.rknn deleted file mode 100644 index ebd4825..0000000 Binary files a/zhuangtai_class_cls_1980x1080/yiliao_cls_old.rknn and /dev/null differ diff --git a/zhuangtai_class_cls_1980x1080/yiliao_main_rknn.py b/zhuangtai_class_cls_1980x1080/yiliao_main_rknn.py deleted file mode 100644 index b6f5fc6..0000000 --- a/zhuangtai_class_cls_1980x1080/yiliao_main_rknn.py +++ /dev/null @@ -1,178 +0,0 @@ -import os -from pathlib import Path -import cv2 -import numpy as np -import platform -from rknnlite.api import RKNNLite - -# --------------------------- -# 类别映射 -# --------------------------- -CLASS_NAMES = { - 0: "未堆料", - 1: "小堆料", - 2: "大堆料", - 3: "未浇筑满", - 4: "浇筑满" -} - -# --------------------------- -# RKNN 全局实例(只加载一次) -# --------------------------- -_global_rknn = None -DEVICE_COMPATIBLE_NODE = '/proc/device-tree/compatible' - - -# ===================================================== -# RKNN MODEL -# ===================================================== -def init_rknn_model(model_path): - global _global_rknn - if _global_rknn is not None: - return _global_rknn - - rknn = RKNNLite(verbose=False) - - ret = rknn.load_rknn(model_path) - if ret != 0: - raise RuntimeError(f"Load RKNN failed: {ret}") - - ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) - if ret != 0: - raise RuntimeError(f"Init runtime failed: {ret}") - - _global_rknn = rknn - print(f"[INFO] RKNN 模型加载成功: {model_path}") - return rknn - - -# --------------------------- -# 图像预处理(统一 640×640) -# --------------------------- -def preprocess(img, size=(640, 640)): - img = cv2.resize(img, size) - img = np.expand_dims(img, 0) - return img - - -# --------------------------- -# 单次 RKNN 分类 -# --------------------------- -def rknn_classify(img_resized, model_path): - rknn = init_rknn_model(model_path) - input_tensor = preprocess(img_resized) - outs = rknn.inference([input_tensor]) - - pred = outs[0].reshape(-1) - class_id = int(np.argmax(pred)) - return class_id, pred.astype(float) - - -# ===================================================== -# ROI 逻辑 -# ===================================================== -def load_single_roi(txt_path): - """ - 只加载第一个 ROI - 格式: x,y,w,h - """ - if not os.path.exists(txt_path): - raise RuntimeError(f"ROI 文件不存在: {txt_path}") - - with open(txt_path) as f: - for line in f: - s = line.strip() - if not s: - continue - try: - x, y, w, h = map(int, s.split(',')) - return (x, y, w, h) - except: - raise RuntimeError(f"❌ ROI 格式错误: {s}") - - raise RuntimeError("❌ ROI 文件为空") - - -def crop_and_resize_single(img, roi, target_size=640): - x, y, w, h = roi - h_img, w_img = img.shape[:2] - - if x < 0 or y < 0 or x + w > w_img or y + h > h_img: - raise RuntimeError(f"ROI 超出图像范围: {roi}") - - roi_img = img[y:y + h, x:x + w] - roi_resized = cv2.resize(roi_img, (target_size, target_size), interpolation=cv2.INTER_AREA) - return roi_resized - - -# ===================================================== -# class1/class2 加权分类增强 -# ===================================================== -def weighted_small_large(pred, threshold=0.4, w1=0.3, w2=0.7): - p1 = float(pred[1]) - p2 = float(pred[2]) - total = p1 + p2 - - score = (w1 * p1 + w2 * p2) / total if total > 0 else 0.0 - final_class = "大堆料" if score >= threshold else "小堆料" - - return final_class, score, p1, p2 - - -# ===================================================== -# 只处理一个 ROI -# ===================================================== -def classify_frame_with_single_roi(model_path, frame, roi_file, threshold=0.4): - """ - 输入: - - frame: BGR 图像 - - model_path: RKNN 模型 - - roi_file: 只包含一个 ROI 的 txt 文件 - - threshold: class1/class2 判断阈值 - - 输出: - { "class": 类别, "score": x, "p1": x, "p2": x } - """ - - if frame is None or not isinstance(frame, np.ndarray): - raise RuntimeError("❌ classify_frame_with_single_roi 传入的 frame 无效") - - # ------- 只加载第一个 ROI ------- - roi = load_single_roi(roi_file) - - # ------- 裁剪并 resize ------- - roi_img = crop_and_resize_single(frame, roi) - - # ------- RKNN 推理 ------- - class_id, pred = rknn_classify(roi_img, model_path) - class_name = CLASS_NAMES.get(class_id, f"未知类别({class_id})") - - # ------- class1/class2 加权处理 ------- - if class_id in [1, 2]: - final_class, score, p1, p2 = weighted_small_large(pred, threshold) - else: - final_class = class_name - score = float(pred[class_id]) - p1, p2 = float(pred[1]), float(pred[2]) - - return { - "class": final_class, - "score": round(score, 4), - "p1": round(p1, 4), - "p2": round(p2, 4) - } - - -# ===================================================== -# 示例调用 -# ===================================================== -if __name__ == "__main__": - model_path = "yiliao_cls.rknn" - roi_file = "./roi_coordinates/1_rois.txt" - - frame = cv2.imread("./test_image/1.png") - - result = classify_frame_with_single_roi(model_path, frame, roi_file) - - print(result) - diff --git a/zhuangtai_class_cls_1980x1080_60/.idea/.gitignore b/zhuangtai_class_cls_1980x1080_60/.idea/.gitignore new file mode 100644 index 0000000..10b731c --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_60/.idea/.gitignore @@ -0,0 +1,5 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ diff --git a/zhuangtai_class_cls_1980x1080_60/.idea/inspectionProfiles/profiles_settings.xml b/zhuangtai_class_cls_1980x1080_60/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_60/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/zhuangtai_class_cls_1980x1080_60/.idea/misc.xml b/zhuangtai_class_cls_1980x1080_60/.idea/misc.xml new file mode 100644 index 0000000..9de2865 --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_60/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/zhuangtai_class_cls_1980x1080/.idea/modules.xml b/zhuangtai_class_cls_1980x1080_60/.idea/modules.xml similarity index 100% rename from zhuangtai_class_cls_1980x1080/.idea/modules.xml rename to zhuangtai_class_cls_1980x1080_60/.idea/modules.xml diff --git a/zhuangtai_class_cls_1980x1080_60/.idea/zhuangtai_class_cls.iml b/zhuangtai_class_cls_1980x1080_60/.idea/zhuangtai_class_cls.iml new file mode 100644 index 0000000..fa7a615 --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_60/.idea/zhuangtai_class_cls.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/zhuangtai_class_cls_1980x1080/README.md b/zhuangtai_class_cls_1980x1080_60/README.md similarity index 100% rename from zhuangtai_class_cls_1980x1080/README.md rename to zhuangtai_class_cls_1980x1080_60/README.md diff --git a/zhuangtai_class_cls_1980x1080_60/roi_coordinates/60_rois.txt b/zhuangtai_class_cls_1980x1080_60/roi_coordinates/60_rois.txt new file mode 100644 index 0000000..8627e04 --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_60/roi_coordinates/60_rois.txt @@ -0,0 +1 @@ +604,182,594,252 diff --git a/zhuangtai_class_cls_1980x1080_60/test_image/1.png b/zhuangtai_class_cls_1980x1080_60/test_image/1.png new file mode 100644 index 0000000..7b70cb0 Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_60/test_image/1.png differ diff --git a/zhuangtai_class_cls_1980x1080_60/test_image/2.png b/zhuangtai_class_cls_1980x1080_60/test_image/2.png new file mode 100644 index 0000000..7c35f5a Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_60/test_image/2.png differ diff --git a/zhuangtai_class_cls_1980x1080_60/test_image/3.png b/zhuangtai_class_cls_1980x1080_60/test_image/3.png new file mode 100644 index 0000000..2134ed9 Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_60/test_image/3.png differ diff --git a/zhuangtai_class_cls_1980x1080_60/test_image/4.png b/zhuangtai_class_cls_1980x1080_60/test_image/4.png new file mode 100644 index 0000000..025ff08 Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_60/test_image/4.png differ diff --git a/zhuangtai_class_cls_1980x1080_60/test_image/5.png b/zhuangtai_class_cls_1980x1080_60/test_image/5.png new file mode 100644 index 0000000..d7df6bc Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_60/test_image/5.png differ diff --git a/zhuangtai_class_cls_1980x1080_60/test_image/6.png b/zhuangtai_class_cls_1980x1080_60/test_image/6.png new file mode 100644 index 0000000..1da345a Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_60/test_image/6.png differ diff --git a/zhuangtai_class_cls_1980x1080_60/yiliao_cls60-old.rknn b/zhuangtai_class_cls_1980x1080_60/yiliao_cls60-old.rknn new file mode 100644 index 0000000..caa4852 Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_60/yiliao_cls60-old.rknn differ diff --git a/zhuangtai_class_cls_1980x1080_60/yiliao_cls60.rknn b/zhuangtai_class_cls_1980x1080_60/yiliao_cls60.rknn new file mode 100644 index 0000000..d691d36 Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_60/yiliao_cls60.rknn differ diff --git a/zhuangtai_class_cls_1980x1080/yiliao_main_pc.py b/zhuangtai_class_cls_1980x1080_60/yiliao_main_pc.py similarity index 100% rename from zhuangtai_class_cls_1980x1080/yiliao_main_pc.py rename to zhuangtai_class_cls_1980x1080_60/yiliao_main_pc.py diff --git a/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn.py b/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn.py new file mode 100644 index 0000000..87cc10b --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn.py @@ -0,0 +1,147 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# --------------------------- +# 类别映射 +# --------------------------- +CLASS_NAMES = { + 0: "未堆料", + 1: "小堆料", + 2: "大堆料", + 3: "未浇筑满", + 4: "浇筑满" +} + +# --------------------------- +# RKNN 全局实例(只加载一次) +# --------------------------- +_global_rknn = None + + +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + + ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + + _global_rknn = rknn + print(f"[INFO] RKNN 模型加载成功: {model_path}") + return rknn + + +# --------------------------- +# 预处理 +# --------------------------- +def letterbox(image, new_size=640, color=(114, 114, 114)): + h, w = image.shape[:2] + scale = min(new_size / h, new_size / w) + nh, nw = int(h * scale), int(w * scale) + resized = cv2.resize(image, (nw, nh)) + new_img = np.full((new_size, new_size, 3), color, dtype=np.uint8) + top = (new_size - nh) // 2 + left = (new_size - nw) // 2 + new_img[top:top + nh, left:left + nw] = resized + return new_img + + +def resize_stretch(image, size=640): + return cv2.resize(image, (size, size)) + + +def preprocess_image_for_rknn(img, size=640, resize_mode="stretch", to_rgb=False, normalize=False, layout="NHWC"): + if resize_mode == "letterbox": + img_box = letterbox(img, new_size=size) + else: + img_box = resize_stretch(img, size=size) + if to_rgb: + img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB) + img_f = img_box.astype(np.float32) + if normalize: + img_f /= 255.0 + if layout == "NHWC": + out = np.expand_dims(img_f, axis=0) + else: + out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0) + return out.astype(np.float32) + + +# --------------------------- +# 单次 RKNN 推理 +# --------------------------- +def rknn_classify_preprocessed(input_tensor, model_path): + rknn = init_rknn_model(model_path) + input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32)) + outs = rknn.inference([input_tensor]) + pred = outs[0].reshape(-1).astype(float) + class_id = int(np.argmax(pred)) + return class_id, pred + + +# --------------------------- +# ROI +# --------------------------- +def load_single_roi(txt_path): + if not os.path.exists(txt_path): + raise RuntimeError(f"ROI 文件不存在: {txt_path}") + with open(txt_path) as f: + for line in f: + s = line.strip() + if not s: continue + x, y, w, h = map(int, s.split(',')) + return (x, y, w, h) + raise RuntimeError("ROI 文件为空") + + +def crop_and_return_roi(img, roi): + x, y, w, h = roi + h_img, w_img = img.shape[:2] + if x < 0 or y < 0 or x + w > w_img or y + h > h_img: + raise RuntimeError(f"ROI 超出图像范围: {roi}") + return img[y:y + h, x:x + w] + + +# --------------------------- +# 单张图片推理 +# --------------------------- +def classify_single_image(frame, model_path, roi_file, + size=640, resize_mode="stretch", + to_rgb=True, normalize=False, layout="NHWC"): + if frame is None: + raise FileNotFoundError("❌ 输入帧为空.") + + roi = load_single_roi(roi_file) + roi_img = crop_and_return_roi(frame, roi) + input_tensor = preprocess_image_for_rknn(roi_img, size=size, resize_mode=resize_mode, + to_rgb=to_rgb, normalize=normalize, layout=layout) + + class_id, pred = rknn_classify_preprocessed(input_tensor, model_path) + class_name = CLASS_NAMES.get(class_id, f"未知类别({class_id})") + + return {"class": class_name, "score": round(float(pred[class_id]), 4), "raw": pred.tolist()} + + +# --------------------------- +# 示例调用 +# --------------------------- +if __name__ == "__main__": + model_path = "yiliao_cls60.rknn" + roi_file = "./roi_coordinates/60_rois.txt" + image_path = "./test_image/5.png" + + # 使用OpenCV读取图像 + frame = cv2.imread(image_path) + if frame is None: + raise FileNotFoundError(f"❌ 无法读取图片: {image_path}") + + result = classify_single_image(frame, model_path, roi_file) + print("[RESULT]", result) diff --git a/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn_jiaquan.py b/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn_jiaquan.py new file mode 100644 index 0000000..c07fe8b --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn_jiaquan.py @@ -0,0 +1,163 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# --------------------------- +# 类别映射 +# --------------------------- +CLASS_NAMES = { + 0: "未堆料", + 1: "小堆料", + 2: "大堆料", + 3: "未浇筑满", + 4: "浇筑满" +} + +# --------------------------- +# RKNN 全局实例(只加载一次) +# --------------------------- +_global_rknn = None + +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + + ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + + _global_rknn = rknn + print(f"[INFO] RKNN 模型加载成功: {model_path}") + return rknn + +# --------------------------- +# 预处理 +# --------------------------- +def letterbox(image, new_size=640, color=(114,114,114)): + h, w = image.shape[:2] + scale = min(new_size/h, new_size/w) + nh, nw = int(h*scale), int(w*scale) + resized = cv2.resize(image, (nw, nh)) + new_img = np.full((new_size, new_size,3), color, dtype=np.uint8) + top = (new_size-nh)//2 + left = (new_size-nw)//2 + new_img[top:top+nh, left:left+nw] = resized + return new_img + +def resize_stretch(image, size=640): + return cv2.resize(image, (size, size)) + +def preprocess_image_for_rknn(img, size=640, resize_mode="stretch", to_rgb=False, normalize=False, layout="NHWC"): + if resize_mode=="letterbox": + img_box = letterbox(img, new_size=size) + else: + img_box = resize_stretch(img, size=size) + if to_rgb: + img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB) + img_f = img_box.astype(np.float32) + if normalize: + img_f /= 255.0 + if layout=="NHWC": + out = np.expand_dims(img_f, axis=0) + else: + out = np.expand_dims(np.transpose(img_f,(2,0,1)), axis=0) + return out.astype(np.float32) + +# --------------------------- +# 单次 RKNN 推理 +# --------------------------- +def rknn_classify_preprocessed(input_tensor, model_path): + rknn = init_rknn_model(model_path) + input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32)) + outs = rknn.inference([input_tensor]) + pred = outs[0].reshape(-1).astype(float) + class_id = int(np.argmax(pred)) + return class_id, pred + +# --------------------------- +# ROI +# --------------------------- +def load_single_roi(txt_path): + if not os.path.exists(txt_path): + raise RuntimeError(f"ROI 文件不存在: {txt_path}") + with open(txt_path) as f: + for line in f: + s = line.strip() + if not s: continue + x,y,w,h = map(int, s.split(',')) + return (x,y,w,h) + raise RuntimeError("ROI 文件为空") + +def crop_and_return_roi(img, roi): + x,y,w,h = roi + h_img, w_img = img.shape[:2] + if x<0 or y<0 or x+w>w_img or y+h>h_img: + raise RuntimeError(f"ROI 超出图像范围: {roi}") + return img[y:y+h, x:x+w] + +# --------------------------- +# class1/class2 加权 +# --------------------------- +def weighted_small_large(pred, threshold=0.4, w1=0.3, w2=0.7): + p1,p2 = float(pred[1]), float(pred[2]) + total = p1+p2 + score = (w1*p1 + w2*p2)/total if total>0 else 0.0 + final_class = "大堆料" if score>=threshold else "小堆料" + return final_class, score, p1, p2 + +# --------------------------- +# 单张图片推理(接收 NumPy 图像) +# --------------------------- +def classify_single_image(frame, model_path, roi_file, + threshold=0.4, + size=640, resize_mode="stretch", + to_rgb=True, normalize=False, layout="NHWC"): + if frame is None: + raise ValueError("❌ 输入图像为空") + + roi = load_single_roi(roi_file) + roi_img = crop_and_return_roi(frame, roi) + input_tensor = preprocess_image_for_rknn(roi_img, size=size, resize_mode=resize_mode, + to_rgb=to_rgb, normalize=normalize, layout=layout) + + class_id, pred = rknn_classify_preprocessed(input_tensor, model_path) + class_name = CLASS_NAMES.get(class_id, f"未知类别({class_id})") + + if class_id in [1, 2]: + final_class, score, p1, p2 = weighted_small_large(pred, threshold) + else: + final_class = class_name + score = float(pred[class_id]) + p1, p2 = float(pred[1]), float(pred[2]) + + return { + "class": final_class, + "score": round(score, 4), + "p1": round(p1, 4), + "p2": round(p2, 4), + "raw": pred.tolist() + } + +# --------------------------- +# 示例调用 +# --------------------------- +if __name__ == "__main__": + model_path = "yiliao_cls.rknn" + roi_file = "./roi_coordinates/1_rois.txt" + image_path = "./test_image/1.png" + + # 使用 OpenCV 读取图像(返回 NumPy 数组) + frame = cv2.imread(image_path) + if frame is None: + raise FileNotFoundError(f"❌ 无法读取图片: {image_path}") + + # 调用推理函数,传入图像数组 + result = classify_single_image(frame, model_path, roi_file) + print("[RESULT]", result) diff --git a/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn_withoutjiaquan.py b/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn_withoutjiaquan.py new file mode 100644 index 0000000..5d9a164 --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_60/yiliao_main_rknn_withoutjiaquan.py @@ -0,0 +1,138 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# --------------------------- +# 类别映射 +# --------------------------- +CLASS_NAMES = { + 0: "未堆料", + 1: "小堆料", + 2: "大堆料", + 3: "未浇筑满", + 4: "浇筑满" +} + +# --------------------------- +# RKNN 全局实例(只加载一次) +# --------------------------- +_global_rknn = None + +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + + ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + + _global_rknn = rknn + print(f"[INFO] RKNN 模型加载成功: {model_path}") + return rknn + +# --------------------------- +# 预处理 +# --------------------------- +def letterbox(image, new_size=640, color=(114,114,114)): + h, w = image.shape[:2] + scale = min(new_size/h, new_size/w) + nh, nw = int(h*scale), int(w*scale) + resized = cv2.resize(image, (nw, nh)) + new_img = np.full((new_size, new_size,3), color, dtype=np.uint8) + top = (new_size-nh)//2 + left = (new_size-nw)//2 + new_img[top:top+nh, left:left+nw] = resized + return new_img + +def resize_stretch(image, size=640): + return cv2.resize(image, (size, size)) + +def preprocess_image_for_rknn(img, size=640, resize_mode="stretch", to_rgb=False, normalize=False, layout="NHWC"): + if resize_mode=="letterbox": + img_box = letterbox(img, new_size=size) + else: + img_box = resize_stretch(img, size=size) + if to_rgb: + img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB) + img_f = img_box.astype(np.float32) + if normalize: + img_f /= 255.0 + if layout=="NHWC": + out = np.expand_dims(img_f, axis=0) + else: + out = np.expand_dims(np.transpose(img_f,(2,0,1)), axis=0) + return out.astype(np.float32) + +# --------------------------- +# 单次 RKNN 推理 +# --------------------------- +def rknn_classify_preprocessed(input_tensor, model_path): + rknn = init_rknn_model(model_path) + input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32)) + outs = rknn.inference([input_tensor]) + pred = outs[0].reshape(-1).astype(float) + class_id = int(np.argmax(pred)) + return class_id, pred + +# --------------------------- +# ROI +# --------------------------- +def load_single_roi(txt_path): + if not os.path.exists(txt_path): + raise RuntimeError(f"ROI 文件不存在: {txt_path}") + with open(txt_path) as f: + for line in f: + s = line.strip() + if not s: continue + x,y,w,h = map(int, s.split(',')) + return (x,y,w,h) + raise RuntimeError("ROI 文件为空") + +def crop_and_return_roi(img, roi): + x,y,w,h = roi + h_img, w_img = img.shape[:2] + if x<0 or y<0 or x+w>w_img or y+h>h_img: + raise RuntimeError(f"ROI 超出图像范围: {roi}") + return img[y:y+h, x:x+w] + +# --------------------------- +# 单张图片推理 +# --------------------------- +def classify_single_image(frame, model_path, roi_file, + size=640, resize_mode="stretch", + to_rgb=True, normalize=False, layout="NHWC"): + if frame is None: + raise FileNotFoundError("❌ 输入帧为空.") + + roi = load_single_roi(roi_file) + roi_img = crop_and_return_roi(frame, roi) + input_tensor = preprocess_image_for_rknn(roi_img, size=size, resize_mode=resize_mode, + to_rgb=to_rgb, normalize=normalize, layout=layout) + + class_id, pred = rknn_classify_preprocessed(input_tensor, model_path) + class_name = CLASS_NAMES.get(class_id, f"未知类别({class_id})") + + return {"class":class_name, "score":round(float(pred[class_id]),4), "raw":pred.tolist()} + +# --------------------------- +# 示例调用 +# --------------------------- +if __name__=="__main__": + model_path = "yiliao_cls.rknn" + roi_file = "./roi_coordinates/1_rois.txt" + image_path = "./test_image/5.png" + + # 使用OpenCV读取图像 + frame = cv2.imread(image_path) + if frame is None: + raise FileNotFoundError(f"❌ 无法读取图片: {image_path}") + + result = classify_single_image(frame, model_path, roi_file) + print("[RESULT]", result) diff --git a/zhuangtai_class_cls_1980x1080_61/.idea/.gitignore b/zhuangtai_class_cls_1980x1080_61/.idea/.gitignore new file mode 100644 index 0000000..10b731c --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_61/.idea/.gitignore @@ -0,0 +1,5 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ diff --git a/zhuangtai_class_cls_1980x1080_61/.idea/inspectionProfiles/profiles_settings.xml b/zhuangtai_class_cls_1980x1080_61/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_61/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/zhuangtai_class_cls_1980x1080_61/.idea/misc.xml b/zhuangtai_class_cls_1980x1080_61/.idea/misc.xml new file mode 100644 index 0000000..9de2865 --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_61/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/zhuangtai_class_cls_1980x1080_61/.idea/modules.xml b/zhuangtai_class_cls_1980x1080_61/.idea/modules.xml new file mode 100644 index 0000000..23eb72c --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_61/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/zhuangtai_class_cls_1980x1080_61/.idea/zhuangtai_class_cls.iml b/zhuangtai_class_cls_1980x1080_61/.idea/zhuangtai_class_cls.iml new file mode 100644 index 0000000..fa7a615 --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_61/.idea/zhuangtai_class_cls.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/zhuangtai_class_cls_1980x1080_61/README.md b/zhuangtai_class_cls_1980x1080_61/README.md new file mode 100644 index 0000000..f4ce027 --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_61/README.md @@ -0,0 +1,78 @@ +# RKNN 堆料分类推理系统 README + +本项目用于在 RK3588 平台上运行 RKNN 分类模型,对多个 ROI 区域进行堆料状态分类,包括: + +未堆料 0 +小堆料 1 +大堆料 2 +未浇筑满 3 +浇筑满 4 + +项目中支持 多 ROI 裁剪、模型推理、加权判断(小/大堆料) 和分类结果输出。 + +## 目录结构 + +project/ +│── yiliao_cls.rknn # RKNN 模型 +│── best.pt # pt 模型 +│── roi_coordinates/ # ROI 坐标文件目录 +│ └── 1_rois.txt +│── test_image/ # 测试图片目录 +│ └── 1.jpg + └── 2.jpg + └── 3.jpg +│── yiliao_main_rknn.py # RKNN主推理脚本 +│── yiliao_main_pc.py # PC推理脚本 +│── README.md + + +## 配置(略) +## 安装依赖(略) + + +## 调用示例 +单张图片推理调用示例 + +```bash + +from yiliao_main_rknn import classify_frame_with_rois + +# 示例调用 +# ===================================================== +if __name__ == "__main__": + model_path = "yiliao_cls.rknn" + roi_file = "./roi_coordinates/1_rois.txt" + + frame = cv2.imread("./test_image/1.png") + + result = classify_frame_with_single_roi(model_path, frame, roi_file) + + print(result) + +``` + +##小堆料 / 大堆料加权判定说明 + +模型原始输出中,小堆料(class 1)与大堆料(class 2)相比时容易出现概率接近的情况。 + +通过加权机制: + +✔ 可以避免因整体概率偏低导致分类不稳定 +✔ 优先放大“大堆料 的可能性”(因为 w2 > w1) +✔ score 更能反映堆料大小的趋势,而不是绝对概率 + +为提高判断稳定性,采用了加权评分方式:(这些参数都可以根据实际情况在文件中对weighted_small_large中参数进行修改) +score = (0.3 * p1 + 0.7 * p2) / (p1 + p2) +score ≥ 0.4 → 大堆料 +score < 0.4 → 小堆料 + +p1:小堆料概率 +p2:大堆料概率 +score 越接近 1 越倾向于大堆料 +score 越接近 0 越倾向于小堆料 + + + + + + diff --git a/zhuangtai_class_cls_1980x1080/roi_coordinates/1_rois.txt b/zhuangtai_class_cls_1980x1080_61/roi_coordinates/61_rois.txt similarity index 100% rename from zhuangtai_class_cls_1980x1080/roi_coordinates/1_rois.txt rename to zhuangtai_class_cls_1980x1080_61/roi_coordinates/61_rois.txt diff --git a/zhuangtai_class_cls_1980x1080_61/test_image/1.png b/zhuangtai_class_cls_1980x1080_61/test_image/1.png new file mode 100644 index 0000000..2a4c35a Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_61/test_image/1.png differ diff --git a/zhuangtai_class_cls_1980x1080/test_image/2.png b/zhuangtai_class_cls_1980x1080_61/test_image/2.png similarity index 100% rename from zhuangtai_class_cls_1980x1080/test_image/2.png rename to zhuangtai_class_cls_1980x1080_61/test_image/2.png diff --git a/zhuangtai_class_cls_1980x1080_61/yiliao_cls61.rknn b/zhuangtai_class_cls_1980x1080_61/yiliao_cls61.rknn new file mode 100644 index 0000000..11f7e11 Binary files /dev/null and b/zhuangtai_class_cls_1980x1080_61/yiliao_cls61.rknn differ diff --git a/zhuangtai_class_cls_1980x1080_61/yiliao_main_pc.py b/zhuangtai_class_cls_1980x1080_61/yiliao_main_pc.py new file mode 100644 index 0000000..539ff30 --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_61/yiliao_main_pc.py @@ -0,0 +1,168 @@ +import os +from pathlib import Path +import cv2 +import numpy as np +from ultralytics import YOLO + +# --------------------------- +# 类别映射 +# --------------------------- +CLASS_NAMES = { + 0: "未堆料", + 1: "小堆料", + 2: "大堆料", + 3: "未浇筑满", + 4: "浇筑满" +} + +# --------------------------- +# 加载 ROI 列表 +# --------------------------- +def load_global_rois(txt_path): + rois = [] + if not os.path.exists(txt_path): + print(f"❌ ROI 文件不存在: {txt_path}") + return rois + with open(txt_path, 'r') as f: + for line in f: + s = line.strip() + if s: + try: + x, y, w, h = map(int, s.split(',')) + rois.append((x, y, w, h)) + except Exception as e: + print(f"无法解析 ROI 行 '{s}': {e}") + return rois + +# --------------------------- +# 裁剪并 resize ROI +# --------------------------- +def crop_and_resize(img, rois, target_size=640): + crops = [] + h_img, w_img = img.shape[:2] + for i, (x, y, w, h) in enumerate(rois): + if x < 0 or y < 0 or x + w > w_img or y + h > h_img: + continue + roi = img[y:y+h, x:x+w] + roi_resized = cv2.resize(roi, (target_size, target_size), interpolation=cv2.INTER_AREA) + crops.append((roi_resized, i)) + return crops + +# --------------------------- +# class1/class2 加权判断 +# --------------------------- +def weighted_small_large(pred_probs, threshold=0.4, w1=0.3, w2=0.7): + p1 = float(pred_probs[1]) + p2 = float(pred_probs[2]) + total = p1 + p2 + if total > 0: + score = (w1 * p1 + w2 * p2) / total + else: + score = 0.0 + final_class = "大堆料" if score >= threshold else "小堆料" + return final_class, score, p1, p2 + +# --------------------------- +# 单张图片推理函数 +# --------------------------- +def classify_image_weighted(image, model, threshold=0.4): + results = model(image) + pred_probs = results[0].probs.data.cpu().numpy().flatten() + class_id = int(pred_probs.argmax()) + confidence = float(pred_probs[class_id]) + class_name = CLASS_NAMES.get(class_id, f"未知类别({class_id})") + + # class1/class2 使用加权得分 + if class_id in [1, 2]: + final_class, score, p1, p2 = weighted_small_large(pred_probs, threshold=threshold) + else: + final_class = class_name + score = confidence + p1 = float(pred_probs[1]) + p2 = float(pred_probs[2]) + + return final_class, score, p1, p2 + +# --------------------------- +# 批量推理主函数 +# --------------------------- +def batch_classify_images(model_path, input_folder, output_root, roi_file, target_size=640, threshold=0.5): + # 加载模型 + model = YOLO(model_path) + + # 确保输出根目录存在 + output_root = Path(output_root) + output_root.mkdir(parents=True, exist_ok=True) + + # 为所有类别创建目录 + class_dirs = {} + for name in CLASS_NAMES.values(): + d = output_root / name + d.mkdir(exist_ok=True) + class_dirs[name] = d + + rois = load_global_rois(roi_file) + if not rois: + print("❌ 没有有效 ROI,退出") + return + + # 遍历图片 + for img_path in Path(input_folder).glob("*.*"): + if img_path.suffix.lower() not in ['.jpg', '.jpeg', '.png', '.bmp', '.tif']: + continue + try: + img = cv2.imread(str(img_path)) + if img is None: + continue + + crops = crop_and_resize(img, rois, target_size) + + for roi_resized, roi_idx in crops: + final_class, score, p1, p2 = classify_image_weighted(roi_resized, model, threshold=threshold) + + # 文件名中保存 ROI、类别、加权分数、class1/class2 置信度 + suffix = f"_roi{roi_idx}_{final_class}_score{score:.2f}_p1{p1:.2f}_p2{p2:.2f}" + dst_path = class_dirs[final_class] / f"{img_path.stem}{suffix}{img_path.suffix}" + cv2.imwrite(dst_path, roi_resized) + print(f"{img_path.name}{suffix} -> {final_class} (score={score:.2f}, p1={p1:.2f}, p2={p2:.2f})") + + except Exception as e: + print(f"处理失败 {img_path.name}: {e}") + + +# --------------------------- +# 单张图片使用示例(保留 ROI,不保存文件) +# --------------------------- +if __name__ == "__main__": + model_path = r"best.pt" + image_path = r"./test_image/2.jpg" # 单张图片路径 + roi_file = r"./roi_coordinates/1_rois.txt" + target_size = 640 + threshold = 0.4 #加权得分阈值可以根据大小堆料分类结果进行调整 + + # 加载模型 + model = YOLO(model_path) + + # 读取 ROI + rois = load_global_rois(roi_file) + if not rois: + print("❌ 没有有效 ROI,退出") + exit(1) + + # 读取图片 + img = cv2.imread(image_path) + if img is None: + print(f"❌ 无法读取图片: {image_path}") + exit(1) + + # 注意:必须裁剪 ROI 并推理,因为训练的时候输入的图像是经过resize的 + crops = crop_and_resize(img, rois, target_size) + for roi_resized, roi_idx in crops: + #final_class, score, p1, p2 = classify_image_weighted(roi_resized, model, threshold=threshold) + final_class,_,_,_ = classify_image_weighted(roi_resized, model, threshold=threshold) + # 只输出信息,不保存文件 + #print(f"ROI {roi_idx} -> 类别: {final_class}, 加权分数: {score:.2f}, " + #f"class1 置信度: {p1:.2f}, class2 置信度: {p2:.2f}") + print(f"类别: {final_class}") + + diff --git a/zhuangtai_class_cls_1980x1080_61/yiliao_main_rknn.py b/zhuangtai_class_cls_1980x1080_61/yiliao_main_rknn.py new file mode 100644 index 0000000..cbd1e6d --- /dev/null +++ b/zhuangtai_class_cls_1980x1080_61/yiliao_main_rknn.py @@ -0,0 +1,163 @@ +import os +import cv2 +import numpy as np +from rknnlite.api import RKNNLite + +# --------------------------- +# 类别映射 +# --------------------------- +CLASS_NAMES = { + 0: "未堆料", + 1: "小堆料", + 2: "大堆料", + 3: "未浇筑满", + 4: "浇筑满" +} + +# --------------------------- +# RKNN 全局实例(只加载一次) +# --------------------------- +_global_rknn = None + +def init_rknn_model(model_path): + global _global_rknn + if _global_rknn is not None: + return _global_rknn + + rknn = RKNNLite(verbose=False) + ret = rknn.load_rknn(model_path) + if ret != 0: + raise RuntimeError(f"Load RKNN failed: {ret}") + + ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0) + if ret != 0: + raise RuntimeError(f"Init runtime failed: {ret}") + + _global_rknn = rknn + print(f"[INFO] RKNN 模型加载成功: {model_path}") + return rknn + +# --------------------------- +# 预处理 +# --------------------------- +def letterbox(image, new_size=640, color=(114,114,114)): + h, w = image.shape[:2] + scale = min(new_size/h, new_size/w) + nh, nw = int(h*scale), int(w*scale) + resized = cv2.resize(image, (nw, nh)) + new_img = np.full((new_size, new_size,3), color, dtype=np.uint8) + top = (new_size-nh)//2 + left = (new_size-nw)//2 + new_img[top:top+nh, left:left+nw] = resized + return new_img + +def resize_stretch(image, size=640): + return cv2.resize(image, (size, size)) + +def preprocess_image_for_rknn(img, size=640, resize_mode="stretch", to_rgb=False, normalize=False, layout="NHWC"): + if resize_mode=="letterbox": + img_box = letterbox(img, new_size=size) + else: + img_box = resize_stretch(img, size=size) + if to_rgb: + img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB) + img_f = img_box.astype(np.float32) + if normalize: + img_f /= 255.0 + if layout=="NHWC": + out = np.expand_dims(img_f, axis=0) + else: + out = np.expand_dims(np.transpose(img_f,(2,0,1)), axis=0) + return out.astype(np.float32) + +# --------------------------- +# 单次 RKNN 推理 +# --------------------------- +def rknn_classify_preprocessed(input_tensor, model_path): + rknn = init_rknn_model(model_path) + input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32)) + outs = rknn.inference([input_tensor]) + pred = outs[0].reshape(-1).astype(float) + class_id = int(np.argmax(pred)) + return class_id, pred + +# --------------------------- +# ROI +# --------------------------- +def load_single_roi(txt_path): + if not os.path.exists(txt_path): + raise RuntimeError(f"ROI 文件不存在: {txt_path}") + with open(txt_path) as f: + for line in f: + s = line.strip() + if not s: continue + x,y,w,h = map(int, s.split(',')) + return (x,y,w,h) + raise RuntimeError("ROI 文件为空") + +def crop_and_return_roi(img, roi): + x,y,w,h = roi + h_img, w_img = img.shape[:2] + if x<0 or y<0 or x+w>w_img or y+h>h_img: + raise RuntimeError(f"ROI 超出图像范围: {roi}") + return img[y:y+h, x:x+w] + +# --------------------------- +# class1/class2 加权 +# --------------------------- +def weighted_small_large(pred, threshold=0.4, w1=0.3, w2=0.7): + p1,p2 = float(pred[1]), float(pred[2]) + total = p1+p2 + score = (w1*p1 + w2*p2)/total if total>0 else 0.0 + final_class = "大堆料" if score>=threshold else "小堆料" + return final_class, score, p1, p2 + +# --------------------------- +# 单张图片推理 +# --------------------------- +def classify_single_image(model_path, frame, roi_file, + threshold=0.4, + size=640, resize_mode="stretch", + to_rgb=True, normalize=False, layout="NHWC"): + """ + 对单张图像进行分类推理(输入为 OpenCV 图像 ndarray) + + Args: + model_path (str): RKNN 模型路径 + frame (np.ndarray): BGR 格式的 OpenCV 图像 (H, W, 3) + roi_file (str): ROI 坐标文件路径(格式:x,y,w,h) + ... 其他参数同上 ... + + Returns: + dict: 分类结果 + """ + if frame is None or frame.size == 0: + raise ValueError("❌ 输入图像为空或无效") + + roi = load_single_roi(roi_file) + roi_img = crop_and_return_roi(frame, roi) + input_tensor = preprocess_image_for_rknn(roi_img, size=size, resize_mode=resize_mode, + to_rgb=to_rgb, normalize=normalize, layout=layout) + + class_id, pred = rknn_classify_preprocessed(input_tensor, model_path) + class_name = CLASS_NAMES.get(class_id, f"未知类别({class_id})") + if class_id in [1, 2]: + final_class, score, p1, p2 = weighted_small_large(pred, threshold) + else: + final_class = class_name + score = float(pred[class_id]) + p1, p2 = float(pred[1]), float(pred[2]) + + return {"class": final_class, "score": round(score, 4), "p1": round(p1, 4), "p2": round(p2, 4), + "raw": pred.tolist()} +# --------------------------- +# 示例调用 +# --------------------------- +if __name__=="__main__": + model_path = "yiliao_cls61.rknn" + roi_file = "./roi_coordinates/61_rois.txt" + image_path = "./test_image/2.png" + + result = classify_single_image(model_path, image_path, roi_file) + print("[RESULT]", result) +