first commit

This commit is contained in:
琉璃月光
2025-12-16 15:12:02 +08:00
commit 8506c3af79
227 changed files with 4060 additions and 0 deletions

162
rknn-multi-threaded-nosigmoid/.gitignore vendored Normal file
View File

@ -0,0 +1,162 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
*.mp4

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,36 @@
# 简介
* 使用多线程异步操作rknn模型, 提高rk3588/rk3588s的NPU使用率, 进而提高推理帧数(rk3568之类修改后应该也能使用, 但是作者本人并没有rk3568开发板......)
* 此分支使用模型[yolov5s_relu_tk2_RK3588_i8.rknn](https://github.com/airockchip/rknn_model_zoo), 将yolov5s模型的激活函数silu修改为为relu,在损失一点精度的情况下获得较大性能提升,详情见于[rknn_model_zoo](https://github.com/airockchip/rknn_model_zoo/tree/main/models/CV/object_detection/yolo)
* 此项目的[c++](https://github.com/leafqycc/rknn-cpp-Multithreading)实现
# 更新说明
*
# 使用说明
### 演示
* 将仓库拉取至本地, 并将Releases中的演示视频放于项目根目录下, 运行main.py查看演示示例
* 切换至root用户运行performance.sh可以进行定频操作(约等于开启性能模式)
* 运行rkcat.sh可以查看当前温度与NPU占用
### 部署应用
* 修改main.py下的modelPath为你自己的模型所在路径
* 修改main.py下的cap为你想要运行的视频/摄像头
* 修改main.py下的TPEs为你想要的线程数, 具体可参考下表
* 修改func.py为你自己需要的推理函数, 具体可查看myFunc函数
# 多线程模型帧率测试
* 使用performance.sh进行CPU/NPU定频尽量减少误差
* 测试模型为[yolov5s_relu_tk2_RK3588_i8.rknn](https://github.com/airockchip/rknn_model_zoo)
* 测试视频见于Releases
| 模型\线程数 | 1 | 2 | 3 | 4 | 5 | 6 |
| ---- | ---- | ---- | ---- | ---- | ---- | ---- |
| yolov5s | 27.4491 | 49.0747 | 65.3673 | 63.3204 | 71.8407 | 72.0590 |
# 补充
* 多线程下CPU, NPU占用较高, **核心温度相应增高**, 请做好散热。推荐开1, 2, 3线程, 实测小铜片散热下运行三分钟温度约为56°, 64°, 69°
# Acknowledgements
* https://github.com/ultralytics/yolov5
* https://github.com/rockchip-linux/rknn-toolkit2
* https://github.com/airockchip/rknn_model_zoo

View File

@ -0,0 +1,246 @@
# 下面代码基于你给出的 yolov5 示例做最小修改的适配版
import cv2
import numpy as np
OBJ_THRESH, NMS_THRESH, IMG_SIZE = 0.25, 0.45, 640
CLASSES = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light",
"fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant",
"bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa",
"pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop\t", "mouse\t", "remote ", "keyboard ", "cell phone", "microwave ",
"oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ")
# ---------- 保留你原来的辅助函数process/filter/nms/xywh2xyxy ----------
def xywh2xyxy(x):
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def process(input, mask, anchors):
# input: (grid_h, grid_w, 3, attrs) attrs >=5+num_classes
# anchors: list of (w,h) pairs for the 3 anchors
anchors = [anchors[i] for i in mask]
grid_h, grid_w = map(int, input.shape[0:2])
box_confidence = input[..., 4]
box_confidence = np.expand_dims(box_confidence, axis=-1)
box_class_probs = input[..., 5:]
# YOLO11 style decode used originally in your code:
box_xy = input[..., :2] * 2 - 0.5
# build grid
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
grid = np.concatenate((col, row), axis=-1)
box_xy += grid
box_xy *= int(IMG_SIZE / grid_h)
box_wh = pow(input[..., 2:4] * 2, 2)
# multiply by provided anchors (we will use unit anchors if we want to neutralize)
box_wh = box_wh * anchors
return np.concatenate((box_xy, box_wh), axis=-1), box_confidence, box_class_probs
def filter_boxes(boxes, box_confidences, box_class_probs):
boxes = boxes.reshape(-1, 4)
box_confidences = box_confidences.reshape(-1)
box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])
_box_pos = np.where(box_confidences >= OBJ_THRESH)
boxes = boxes[_box_pos]
box_confidences = box_confidences[_box_pos]
box_class_probs = box_class_probs[_box_pos]
class_max_score = np.max(box_class_probs, axis=-1)
classes = np.argmax(box_class_probs, axis=-1)
_class_pos = np.where(class_max_score >= OBJ_THRESH)
return boxes[_class_pos], classes[_class_pos], (class_max_score * box_confidences)[_class_pos]
def nms_boxes(boxes, scores):
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
eps = 1e-7
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 1e-5)
h1 = np.maximum(0.0, yy2 - yy1 + 1e-5)
inter = w1 * h1
denom = (areas[i] + areas[order[1:]] - inter)
denom = np.maximum(denom, eps)
ovr = inter / denom
inds = np.where(ovr <= NMS_THRESH)[0]
order = order[inds + 1]
return np.array(keep)
# ---------- 关键:把你的 9 输出拼成原来 yolov5_post_process 需要的 input_data 格式 ----------
def yolov11_to_yolov5_style_input(outputs):
"""
outputs: list of 9 tensors (1, C, H, W) in this order per your print:
[reg80, cls80, obj80, reg40, cls40, obj40, reg20, cls20, obj20]
We will convert each scale to a (H, W, 3, 5+num_classes) array and repeat
the same per-anchor slice so that your existing yolov5_post_process can be reused.
To avoid anchor scaling changing box_wh, we set anchors to 1x1 in later call.
"""
input_data = []
# scales: (indices and corresponding H,W from tensors)
for i in range(0, 9, 3):
reg = outputs[i][0] # (64, H, W)
cls = outputs[i+1][0] # (80, H, W)
obj = outputs[i+2][0] # (1, H, W)
# find H,W from reg
H = reg.shape[1]
W = reg.shape[2]
# xywh: assume first 4 channels of reg are x,y,w,h per cell
xywh = reg[0:4, :, :] # (4, H, W)
xywh = np.transpose(xywh, (1, 2, 0)) # (H, W, 4)
# obj and cls to H,W,?
obj_hw = np.transpose(obj[0, :, :], (0, 1)) # (H, W)
cls_hw = np.transpose(cls, (1, 2, 0)) # (H, W, 80)
# build one anchor slice: [x,y,w,h,obj, cls80] -> shape (H, W, 5+80)
slice_hw = np.concatenate([xywh, obj_hw[..., None], cls_hw], axis=-1) # (H, W, 85)
# repeat to make 3 anchors per cell (so shape becomes H,W,3,85)
slice_3 = np.repeat(slice_hw[:, :, None, :], 3, axis=2) # (H, W, 3, 85)
input_data.append(slice_3)
return input_data
def yolov5_post_process_adapted(input_data):
"""
复用你原来的 yolov5_post_process但使用 unit anchors so that
box_wh 不会被不正确放缩。
"""
masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] # unused in anchors here, but kept for compatibility
# use neutral anchors (1,1) to avoid scaling change
anchors = [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1],
[1, 1], [1, 1], [1, 1], [1, 1]]
boxes, classes, scores = [], [], []
# input_data already is list of 3 arrays shaped (H,W,3,85)
for input in input_data:
# process() expects shape (grid_h, grid_w, 3, attrs)
b, c, s = process(input, [0,1,2], anchors) # mask and anchors values used inside process
b, c, s = filter_boxes(b, c, s)
boxes.append(b)
classes.append(c)
scores.append(s)
if len(boxes) == 0:
return None, None, None
boxes = np.concatenate(boxes)
boxes = xywh2xyxy(boxes)
classes = np.concatenate(classes)
scores = np.concatenate(scores)
# nms per class
nboxes, nclasses, nscores = [], [], []
for cls_id in set(classes):
inds = np.where(classes == cls_id)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = nms_boxes(b, s)
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
return np.concatenate(nboxes), np.concatenate(nclasses), np.concatenate(nscores)
# ---------- draw 保持原样 ----------
def draw(image, boxes, scores, classes):
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = box
top = int(top)
left = int(left)
cv2.rectangle(image, (top, left), (int(right), int(bottom)), (255, 0, 0), 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left - 6),
cv2.FONT_HERSHEY_SIMPLEX,
0.6, (0, 0, 255), 2)
# ---------- 最终 myFunc替换你原来的 myFunc ----------
def myFunc(rknn_lite, IMG):
# 1. BGR -> RGB
IMG = cv2.cvtColor(IMG, cv2.COLOR_BGR2RGB)
# 2. Resize to model input size (640x640)
IMG = cv2.resize(IMG, (IMG_SIZE, IMG_SIZE)) # (640, 640, 3)
# 3. HWC -> CHW
IMG = np.transpose(IMG, (2, 0, 1)) # (3, 640, 640)
# 4. Add batch dimension: (1, 3, 640, 640)
IMG_in = np.expand_dims(IMG, axis=0).astype(np.uint8)
# 5. Inference
outputs = rknn_lite.inference(inputs=[IMG_in])
if outputs is None:
print("⚠️ Inference failed, skipping frame.")
return cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
# 6. Convert 9-output -> yolov5 style input_data
input_data = yolov11_to_yolov5_style_input(outputs)
# 7. Run adapted yolov5_post_process
boxes, classes, scores = yolov5_post_process_adapted(input_data)
# 8. 如果你只想保留 carCOCO id=2在这里再过滤一次
if boxes is not None:
keep_car = np.where(classes == 2)[0]
if keep_car.size == 0:
boxes, classes, scores = None, None, None
else:
boxes = boxes[keep_car]
classes = classes[keep_car]
scores = scores[keep_car]
# Convert back to BGR for OpenCV display
IMG_vis = cv2.cvtColor(IMG_in.squeeze().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
if boxes is not None:
draw(IMG_vis, boxes, scores, classes)
return IMG_vis

View File

@ -0,0 +1,70 @@
import cv2
import numpy as np
# ------------------ 手动定义分类标签 ------------------
labels = ["noready", "ready"] # 0: 盖板不到位, 1: 盖板到位
# ------------------ 配置 ------------------
IMG_SIZE = (640, 640)
def preprocess_cls(bgr_image_corrected, target_size=IMG_SIZE):
"""
对已经校正方向(翻转后)的 BGR 图像进行预处理:
- 转 RGB
- Resize
- 添加 batch 维度
注意:此函数不再负责翻转!翻转应在调用前完成。
"""
rgb = cv2.cvtColor(bgr_image_corrected, cv2.COLOR_BGR2RGB)
resized = cv2.resize(rgb, target_size)
input_tensor = np.expand_dims(resized, axis=0).astype(np.uint8)
return input_tensor
def get_top1_class_str_from_output(outputs):
if outputs is None or len(outputs) == 0:
return "Error: No output"
logits = outputs[0].flatten()
top1_idx = int(np.argmax(logits))
if 0 <= top1_idx < len(labels):
return labels[top1_idx]
else:
return "Unknown"
def myFunc(rknn_lite, IMG):
"""
统一推理接口(分类任务)
- 输入原始 BGR 图像(可能倒置)
- 内部先上下翻转(校正方向)
- 模型在翻转后的图像上推理
- 返回带结果的翻转后图像(即“正”的图像)
"""
# 1. 上下翻转原始图像(校正摄像头倒装)
corrected_img = cv2.flip(IMG, 0) # 现在这是“正”的图像
# 2. 预处理(使用校正后的图像)
input_data = preprocess_cls(corrected_img, target_size=IMG_SIZE)
# 3. 推理
outputs = rknn_lite.inference(inputs=[input_data])
# 4. 解析结果
class_name = get_top1_class_str_from_output(outputs)
# 5. 在校正后的图像上绘制结果
vis_img = corrected_img.copy()
text = f"Class: {class_name}"
cv2.putText(
vis_img,
text,
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1.0,
(0, 255, 0),
2,
cv2.LINE_AA
)
return vis_img # 返回的是上下翻转后的 BGR 图像(即视觉上“正”的图)

View File

@ -0,0 +1,442 @@
import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img
import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img
import cv2
import numpy as np
# ---------------------------
# 固化配置
# ---------------------------
CLASS_NAMES = {
0: "0",
1: "1",
2: "2",
3: "3",
4: "4"
}
# FIXED_ROI 是基于原始分辨率图像(翻转后)的坐标
# 比如基于 1920x1080
FIXED_ROI = (445, 540, 931, 319)
# 原始分辨率ROI 对应的标注基准)
ORIG_W = 1920
ORIG_H = 1080
# 当前输入图像尺寸(你说已经固定成 640x640
CUR_W = 640
CUR_H = 640
IMG_SIZE = 640
RESIZE_MODE = "stretch"
TO_RGB = True
NORMALIZE = False
LAYOUT = "NHWC"
WEIGHT_THRESHOLD = 0.4
W1, W2 = 0.3, 0.7
# ---------------------------
# 基本工具函数
# ---------------------------
def resize_stretch(image, size=IMG_SIZE):
return cv2.resize(image, (size, size))
def preprocess_image_for_rknn(img, size, resize_mode, to_rgb, normalize, layout):
if resize_mode == "letterbox":
raise NotImplementedError
else:
img_box = resize_stretch(img, size=size)
if to_rgb:
img_box = cv2.cvtColor(img_box, cv2.COLOR_BGR2RGB)
img_f = img_box.astype(np.float32)
if normalize:
img_f /= 255.0
if layout == "NHWC":
out = np.expand_dims(img_f, axis=0)
else:
out = np.expand_dims(np.transpose(img_f, (2, 0, 1)), axis=0)
return out.astype(np.float32)
def weighted_small_large(pred, threshold=WEIGHT_THRESHOLD, w1=W1, w2=W2):
p1, p2 = float(pred[1]), float(pred[2])
total = p1 + p2
score = (w1*p1 + w2*p2)/total if total > 0 else 0.0
return ("大堆料" if score >= threshold else "小堆料"), score, p1, p2
# ---------------------------
# ROI 缩放函数(核心)
# ---------------------------
def scale_roi_to_current(img, orig_roi, orig_w, orig_h):
"""
将基于原始分辨率的 ROI 坐标,缩放到当前尺寸的图像。
"""
x, y, w, h = orig_roi
# 当前图像尺寸
cur_h, cur_w = img.shape[:2]
scale_x = cur_w / orig_w
scale_y = cur_h / orig_h
sx = int(x * scale_x)
sy = int(y * scale_y)
sw = int(w * scale_x)
sh = int(h * scale_y)
return sx, sy, sw, sh
# ---------------------------
# 主推理接口
# ---------------------------
def myFunc(rknn_lite, IMG):
if IMG is None or IMG.size == 0:
raise ValueError("输入图像无效")
# 1) 输入已经是 640x640但 ROI 是 1920x1080 标注的
# 所以必须先缩放 ROI
scaled_roi = scale_roi_to_current(IMG, FIXED_ROI, ORIG_W, ORIG_H)
# 2) 翻转图像(你说 ROI 是基于“翻转后的原图”)
flipped_IMG = cv2.flip(IMG, 0)
# 3) 在翻转后的图像上裁剪缩放好的 ROI
x, y, w, h = scaled_roi
roi_img = flipped_IMG[y:y+h, x:x+w]
# 4) RKNN 输入预处理
input_tensor = preprocess_image_for_rknn(
roi_img,
size=IMG_SIZE,
resize_mode=RESIZE_MODE,
to_rgb=TO_RGB,
normalize=NORMALIZE,
layout=LAYOUT
)
input_tensor = np.ascontiguousarray(input_tensor.astype(np.float32))
# 5) 推理
outputs = rknn_lite.inference([input_tensor])
pred = outputs[0].reshape(-1).astype(float)
class_id = int(np.argmax(pred))
# 分类名称处理
raw_class_name = CLASS_NAMES.get(class_id, f"未知({class_id})")
if class_id in [1, 2]:
final_class, _, _, _ = weighted_small_large(pred)
else:
final_class = raw_class_name
# 6) 可视化(画在 flipped_IMG
vis_img = flipped_IMG.copy()
cv2.rectangle(vis_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
ty = y - 10 if y >= 20 else y + h + 20
cv2.putText(
vis_img, f"Class: {final_class}",
(x, ty),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,(0,255,0), 2
)
return vis_img

View File

@ -0,0 +1,198 @@
import cv2
import numpy as np
import math
# ---------- 配置 ----------
CLASSES = ['clamp']
nmsThresh = 0.4
objectThresh = 0.5
INPUT_DTYPE = np.uint8 # 根据模型类型修改
# ---------- 可选绘制 ----------
DRAW_BOX = True # 是否绘制检测框
DRAW_SCORE = False # 是否绘制置信度
# ---------------- 工具函数 ----------------
class DetectBox:
def __init__(self, classId, score, xmin, ymin, xmax, ymax, angle):
self.classId = classId
self.score = score
self.xmin = xmin
self.ymin = ymin
self.xmax = xmax
self.ymax = ymax
self.angle = angle
def letterbox_resize(image, size, bg_color=114):
tw, th = size
h, w = image.shape[:2]
scale = min(tw / w, th / h)
nw, nh = int(w * scale), int(h * scale)
img_resized = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_AREA)
canvas = np.full((th, tw, 3), bg_color, dtype=np.uint8)
dx, dy = (tw - nw) // 2, (th - nh) // 2
canvas[dy:dy + nh, dx:dx + nw] = img_resized
return canvas, scale, dx, dy
def rotate_rectangle(x1, y1, x2, y2, a):
cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
cos_a, sin_a = math.cos(a), math.sin(a)
pts = [[x1, y1], [x1, y2], [x2, y2], [x2, y1]]
return [
[
int(cx + (xx - cx) * cos_a - (yy - cy) * sin_a),
int(cy + (xx - cx) * sin_a + (yy - cy) * cos_a)
]
for xx, yy in pts
]
def polygon_area(pts):
x, y = zip(*pts)
return 0.5 * abs(sum(x[i] * y[(i + 1) % len(pts)] - x[(i + 1) % len(pts)] * y[i] for i in range(len(pts))))
def polygon_intersection_area(p1, p2):
try:
from shapely.geometry import Polygon
poly1, poly2 = Polygon(p1), Polygon(p2)
if not poly1.is_valid:
poly1 = poly1.buffer(0)
if not poly2.is_valid:
poly2 = poly2.buffer(0)
inter = poly1.intersection(poly2)
return inter.area if not inter.is_empty else 0.0
except Exception:
return 0.0
def IoU(box1, box2):
p1 = rotate_rectangle(box1.xmin, box1.ymin, box1.xmax, box1.ymax, box1.angle)
p2 = rotate_rectangle(box2.xmin, box2.ymin, box2.xmax, box2.ymax, box2.angle)
inter = polygon_intersection_area(p1, p2)
area1 = polygon_area(p1)
area2 = polygon_area(p2)
union = area1 + area2 - inter
return inter / union if union > 1e-6 else 0.0
def NMS(detectResult):
if not detectResult:
return []
# 先按分数排序
boxes = sorted(detectResult, key=lambda x: x.score, reverse=True)
keep = []
for i, b1 in enumerate(boxes):
if b1.classId == -1:
continue
keep.append(b1)
for j in range(i + 1, len(boxes)):
b2 = boxes[j]
if b2.classId == b1.classId and IoU(b1, b2) > nmsThresh:
b2.classId = -1
return keep
from scipy.special import expit
def sigmoid(x):
return expit(x)
def softmax(x, axis=-1):
x = np.asarray(x)
x_max = np.max(x, axis=axis, keepdims=True)
e = np.exp(x - x_max)
return e / (e.sum(axis=axis, keepdims=True) + 1e-9)
ARANGE16 = np.arange(16).reshape(1, 1, 16, 1)
def process(out, model_w, model_h, stride, angle_feature, index, scale_w=1, scale_h=1):
angle_feature = angle_feature.reshape(-1)
xywh = out[:, :64, :] # [1, 64, H*W]
conf = sigmoid(out[:, 64:, :]).reshape(-1) # [H*W]
boxes = []
class_num = len(CLASSES)
total = model_h * model_w * class_num
for ik in range(total):
if conf[ik] <= objectThresh:
continue
w = ik % model_w
h = (ik // model_w) % model_h
c = ik // (model_w * model_h)
xywh_ = xywh[0, :, h * model_w + w].reshape(1, 4, 16, 1)
xywh_ = softmax(xywh_, axis=2)
xywh_ = np.sum(xywh_ * ARANGE16, axis=2).reshape(-1) # [4]
xy_add = xywh_[:2] + xywh_[2:]
xy_sub = (xywh_[2:] - xywh_[:2]) / 2
angle = (angle_feature[index + h * model_w + w] - 0.25) * math.pi
cos_a, sin_a = math.cos(angle), math.sin(angle)
xy_rot = np.array([
xy_sub[0] * cos_a - xy_sub[1] * sin_a,
xy_sub[0] * sin_a + xy_sub[1] * cos_a
])
cx = (xy_rot[0] + w + 0.5) * stride
cy = (xy_rot[1] + h + 0.5) * stride
w_box = xy_add[0] * stride
h_box = xy_add[1] * stride
xmin = (cx - w_box / 2) * scale_w
ymin = (cy - h_box / 2) * scale_h
xmax = (cx + w_box / 2) * scale_w
ymax = (cy + h_box / 2) * scale_h
boxes.append(DetectBox(c, float(conf[ik]), float(xmin), float(ymin), float(xmax), float(ymax), float(angle)))
return boxes
# ---------------- RKNN 推理接口 ----------------
def myFunc(rknn, frame, lock=None):
try:
img_resized, scale, offset_x, offset_y = letterbox_resize(frame, (640, 640))
infer_img = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
infer_input = np.expand_dims(infer_img.astype(INPUT_DTYPE), 0)
results = rknn.inference([infer_input])
if not results or len(results) < 1:
return frame
outputs = []
for x in results[:-1]:
if x is None:
continue
stride, index = 0, 0
if x.shape[2] == 20:
stride, index = 32, 20 * 4 * 20 * 4 + 20 * 2 * 20 * 2
elif x.shape[2] == 40:
stride, index = 16, 20 * 4 * 20 * 4
elif x.shape[2] == 80:
stride, index = 8, 0
else:
continue
feature = x.reshape(1, 65, -1)
outputs += process(feature, x.shape[3], x.shape[2], stride, results[-1], index, 1.0 / scale, 1.0 / scale)
if not outputs:
return frame
predbox = NMS(outputs)
if len(predbox) < 2:
return frame
box1, box2 = sorted(predbox, key=lambda x: x.score, reverse=True)[:2]
out_frame = frame.copy()
for box in [box1, box2]:
xmin = int((box.xmin - offset_x) / scale)
ymin = int((box.ymin - offset_y) / scale)
xmax = int((box.xmax - offset_x) / scale)
ymax = int((box.ymax - offset_y) / scale)
if DRAW_BOX:
pts = rotate_rectangle(xmin, ymin, xmax, ymax, box.angle)
cv2.polylines(out_frame, [np.array(pts, np.int32)], True, (0, 255, 0), 2)
if DRAW_SCORE:
cv2.putText(
out_frame,
f"{box.score:.2f}",
(xmin, max(10, ymin - 6)),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 255, 0),
2,
)
return out_frame
except Exception as e:
print(f"[func ❌] 推理异常: {e}")
return frame # 出错时返回原图,保证是 np.ndarray

View File

@ -0,0 +1,214 @@
import cv2
import numpy as np
import math
# ---------- 配置 ----------
CLASSES = ['clamp']
nmsThresh = 0.4
objectThresh = 0.5
INPUT_DTYPE = np.uint8
DRAW_BOX = True
DRAW_SCORE = False
# ---------------- 工具函数 ----------------
class DetectBox:
def __init__(self, classId, score, xmin, ymin, xmax, ymax, angle):
self.classId = classId
self.score = score
self.xmin = xmin
self.ymin = ymin
self.xmax = xmax
self.ymax = ymax
self.angle = angle
def letterbox_resize(image, size, bg_color=114):
tw, th = size
h, w = image.shape[:2]
scale = min(tw / w, th / h)
nw, nh = int(w * scale), int(h * scale)
img_resized = cv2.resize(image, (nw, nh), interpolation=cv2.INTER_AREA)
canvas = np.full((th, tw, 3), bg_color, dtype=np.uint8)
dx, dy = (tw - nw) // 2, (th - nh) // 2
canvas[dy:dy + nh, dx:dx + nw] = img_resized
return canvas, scale, dx, dy
def rotate_rectangle(x1, y1, x2, y2, a):
cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
cos_a, sin_a = math.cos(a), math.sin(a)
pts = [[x1, y1], [x1, y2], [x2, y2], [x2, y1]]
return [
[
int(cx + (xx - cx) * cos_a - (yy - cy) * sin_a),
int(cy + (xx - cx) * sin_a + (yy - cy) * cos_a)
]
for xx, yy in pts
]
def polygon_area(pts):
x, y = zip(*pts)
return 0.5 * abs(sum(x[i] * y[(i+1)%4] - x[(i+1)%4]*y[i] for i in range(4)))
def polygon_intersection_area(p1, p2):
try:
from shapely.geometry import Polygon
poly1, poly2 = Polygon(p1), Polygon(p2)
if not poly1.is_valid: poly1 = poly1.buffer(0)
if not poly2.is_valid: poly2 = poly2.buffer(0)
inter = poly1.intersection(poly2)
return inter.area if not inter.is_empty else 0
except:
return 0
def IoU(b1, b2):
p1 = rotate_rectangle(b1.xmin, b1.ymin, b1.xmax, b1.ymax, b1.angle)
p2 = rotate_rectangle(b2.xmin, b2.ymin, b2.xmax, b2.ymax, b2.angle)
inter = polygon_intersection_area(p1, p2)
area1 = polygon_area(p1)
area2 = polygon_area(p2)
union = area1 + area2 - inter
return inter / union if union > 1e-6 else 0
def NMS(boxes):
if not boxes: return []
boxes = sorted(boxes, key=lambda x: x.score, reverse=True)
keep = []
for i, b1 in enumerate(boxes):
if b1.classId == -1: continue
keep.append(b1)
for j in range(i+1, len(boxes)):
b2 = boxes[j]
if b2.classId == b1.classId and IoU(b1, b2) > nmsThresh:
b2.classId = -1
return keep
from scipy.special import expit
def sigmoid(x): return expit(x)
def softmax(x, axis=-1):
x = np.asarray(x)
x_max = np.max(x, axis=axis, keepdims=True)
e = np.exp(x - x_max)
return e / (e.sum(axis=axis, keepdims=True) + 1e-9)
ARANGE16 = np.arange(16).reshape(1,1,16,1)
def process(out, mw, mh, stride, angle_feature, index, scale_w=1, scale_h=1):
angle_feature = angle_feature.reshape(-1)
xywh = out[:, :64, :]
conf = sigmoid(out[:, 64:, :]).reshape(-1)
boxes = []
class_num = len(CLASSES)
total = mh * mw * class_num
for ik in range(total):
if conf[ik] <= objectThresh: continue
w = ik % mw
h = (ik // mw) % mh
c = ik // (mw * mh)
xywh_ = xywh[0, :, h * mw + w].reshape(1,4,16,1)
xywh_ = softmax(xywh_, axis=2)
xywh_ = np.sum(xywh_ * ARANGE16, axis=2).reshape(-1)
xy_add = xywh_[:2] + xywh_[2:]
xy_sub = (xywh_[2:] - xywh_[:2]) / 2
angle = (angle_feature[index + h*mw + w] - 0.25) * math.pi
cos_a, sin_a = math.cos(angle), math.sin(angle)
xy_rot = np.array([
xy_sub[0]*cos_a - xy_sub[1]*sin_a,
xy_sub[0]*sin_a + xy_sub[1]*cos_a
])
cx = (xy_rot[0] + w + 0.5) * stride
cy = (xy_rot[1] + h + 0.5) * stride
w_box = xy_add[0] * stride
h_box = xy_add[1] * stride
xmin = (cx - w_box/2) * scale_w
ymin = (cy - h_box/2) * scale_h
xmax = (cx + w_box/2) * scale_w
ymax = (cy + h_box/2) * scale_h
boxes.append(DetectBox(c, float(conf[ik]), xmin, ymin, xmax, ymax, float(angle)))
return boxes
# ---------------- RKNN 推理接口 ----------------
def myFunc(rknn, frame, lock=None):
try:
# --- 推理前 letterbox ---
img_resized, scale, offset_x, offset_y = letterbox_resize(frame, (640,640))
infer_img = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
infer_input = np.expand_dims(infer_img.astype(INPUT_DTYPE), 0)
results = rknn.inference([infer_input])
if not results or len(results) < 1:
return frame
outputs = []
for x in results[:-1]:
if x is None: continue
# 决定 stride 与索引
if x.shape[2] == 20:
stride, index = 32, 20*4*20*4 + 20*2*20*2
elif x.shape[2] == 40:
stride, index = 16, 20*4*20*4
elif x.shape[2] == 80:
stride, index = 8, 0
else:
continue
feature = x.reshape(1,65,-1)
outputs += process(
feature,
x.shape[3],
x.shape[2],
stride,
results[-1],
index,
1.0, 1.0 # 输出坐标保持在640×640下
)
if not outputs:
return frame
predbox = NMS(outputs)
if len(predbox) < 2:
return frame
box1, box2 = sorted(predbox, key=lambda x: x.score, reverse=True)[:2]
out_frame = frame.copy()
# ========== 还原到原图坐标 ==========
def restore_to_original(b):
xmin = int((b.xmin - offset_x) / scale)
ymin = int((b.ymin - offset_y) / scale)
xmax = int((b.xmax - offset_x) / scale)
ymax = int((b.ymax - offset_y) / scale)
return xmin, ymin, xmax, ymax
for box in [box1, box2]:
xmin, ymin, xmax, ymax = restore_to_original(box)
if DRAW_BOX:
# 旋转框顶点(注意:旋转必须在原图坐标系)
pts = rotate_rectangle(xmin, ymin, xmax, ymax, box.angle)
cv2.polylines(out_frame, [np.array(pts, np.int32)], True, (0,255,0), 2)
if DRAW_SCORE:
cv2.putText(
out_frame, f"{box.score:.2f}",
(xmin, max(10, ymin - 6)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2
)
return out_frame
except Exception as e:
print(f"[func ❌] 推理异常: {e}")
return frame

View File

@ -0,0 +1,254 @@
# 下面代码基于你给出的 yolov5 示例做最小修改的适配版
import cv2
import numpy as np
OBJ_THRESH, NMS_THRESH, IMG_SIZE = 0.25, 0.45, 640
CLASSES = ("person", "bicycle", "car", "motorbike ", "aeroplane ", "bus ", "train", "truck ", "boat", "traffic light",
"fire hydrant", "stop sign ", "parking meter", "bench", "bird", "cat", "dog ", "horse ", "sheep", "cow", "elephant",
"bear", "zebra ", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife ",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza ", "donut", "cake", "chair", "sofa",
"pottedplant", "bed", "diningtable", "toilet ", "tvmonitor", "laptop\t", "mouse\t", "remote ", "keyboard ", "cell phone", "microwave ",
"oven ", "toaster", "sink", "refrigerator ", "book", "clock", "vase", "scissors ", "teddy bear ", "hair drier", "toothbrush ")
# ---------- 保留你原来的辅助函数process/filter/nms/xywh2xyxy ----------
def xywh2xyxy(x):
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
def process(input, mask, anchors):
# input: (grid_h, grid_w, 3, attrs) attrs >=5+num_classes
# anchors: list of (w,h) pairs for the 3 anchors
anchors = [anchors[i] for i in mask]
grid_h, grid_w = map(int, input.shape[0:2])
box_confidence = input[..., 4]
box_confidence = np.expand_dims(box_confidence, axis=-1)
box_class_probs = input[..., 5:]
# YOLO11 style decode used originally in your code:
box_xy = input[..., :2] * 2 - 0.5
# build grid
col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
grid = np.concatenate((col, row), axis=-1)
box_xy += grid
box_xy *= int(IMG_SIZE / grid_h)
box_wh = pow(input[..., 2:4] * 2, 2)
# multiply by provided anchors (we will use unit anchors if we want to neutralize)
box_wh = box_wh * anchors
return np.concatenate((box_xy, box_wh), axis=-1), box_confidence, box_class_probs
def filter_boxes(boxes, box_confidences, box_class_probs):
boxes = boxes.reshape(-1, 4)
box_confidences = box_confidences.reshape(-1)
box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])
_box_pos = np.where(box_confidences >= OBJ_THRESH)
boxes = boxes[_box_pos]
box_confidences = box_confidences[_box_pos]
box_class_probs = box_class_probs[_box_pos]
class_max_score = np.max(box_class_probs, axis=-1)
classes = np.argmax(box_class_probs, axis=-1)
_class_pos = np.where(class_max_score >= OBJ_THRESH)
return boxes[_class_pos], classes[_class_pos], (class_max_score * box_confidences)[_class_pos]
def nms_boxes(boxes, scores):
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
eps = 1e-7
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 1e-5)
h1 = np.maximum(0.0, yy2 - yy1 + 1e-5)
inter = w1 * h1
denom = (areas[i] + areas[order[1:]] - inter)
denom = np.maximum(denom, eps)
ovr = inter / denom
inds = np.where(ovr <= NMS_THRESH)[0]
order = order[inds + 1]
return np.array(keep)
# ---------- 关键:把你的 9 输出拼成原来 yolov5_post_process 需要的 input_data 格式 ----------
def yolov11_to_yolov5_style_input(outputs):
"""
outputs: list of 9 tensors (1, C, H, W) in this order per your print:
[reg80, cls80, obj80, reg40, cls40, obj40, reg20, cls20, obj20]
We will convert each scale to a (H, W, 3, 5+num_classes) array and repeat
the same per-anchor slice so that your existing yolov5_post_process can be reused.
To avoid anchor scaling changing box_wh, we set anchors to 1x1 in later call.
"""
input_data = []
# scales: (indices and corresponding H,W from tensors)
for i in range(0, 9, 3):
reg = outputs[i][0] # (64, H, W)
cls = outputs[i+1][0] # (80, H, W)
obj = outputs[i+2][0] # (1, H, W)
# find H,W from reg
H = reg.shape[1]
W = reg.shape[2]
# xywh: assume first 4 channels of reg are x,y,w,h per cell
xywh = reg[0:4, :, :] # (4, H, W)
xywh = np.transpose(xywh, (1, 2, 0)) # (H, W, 4)
# obj and cls to H,W,?
obj_hw = np.transpose(obj[0, :, :], (0, 1)) # (H, W)
cls_hw = np.transpose(cls, (1, 2, 0)) # (H, W, 80)
# build one anchor slice: [x,y,w,h,obj, cls80] -> shape (H, W, 5+80)
slice_hw = np.concatenate([xywh, obj_hw[..., None], cls_hw], axis=-1) # (H, W, 85)
# repeat to make 3 anchors per cell (so shape becomes H,W,3,85)
slice_3 = np.repeat(slice_hw[:, :, None, :], 3, axis=2) # (H, W, 3, 85)
input_data.append(slice_3)
return input_data
def yolov5_post_process_adapted(input_data):
"""
复用你原来的 yolov5_post_process但使用 unit anchors so that
box_wh 不会被不正确放缩。
"""
masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] # unused in anchors here, but kept for compatibility
# use neutral anchors (1,1) to avoid scaling change
anchors = [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1],
[1, 1], [1, 1], [1, 1], [1, 1]]
boxes, classes, scores = [], [], []
# input_data already is list of 3 arrays shaped (H,W,3,85)
for input in input_data:
# process() expects shape (grid_h, grid_w, 3, attrs)
b, c, s = process(input, [0,1,2], anchors) # mask and anchors values used inside process
b, c, s = filter_boxes(b, c, s)
boxes.append(b)
classes.append(c)
scores.append(s)
if len(boxes) == 0:
return None, None, None
boxes = np.concatenate(boxes)
boxes = xywh2xyxy(boxes)
classes = np.concatenate(classes)
scores = np.concatenate(scores)
# nms per class
nboxes, nclasses, nscores = [], [], []
for cls_id in set(classes):
inds = np.where(classes == cls_id)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = nms_boxes(b, s)
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
return np.concatenate(nboxes), np.concatenate(nclasses), np.concatenate(nscores)
# ---------- draw 保持原样 ----------
def draw(image, boxes, scores, classes):
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = box
top = int(top)
left = int(left)
cv2.rectangle(image, (top, left), (int(right), int(bottom)), (255, 0, 0), 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left - 6),
cv2.FONT_HERSHEY_SIMPLEX,
0.6, (0, 0, 255), 2)
# ---------- 最终 myFunc替换你原来的 myFunc ----------
def myFunc(rknn_lite, IMG):
# 1. BGR → RGB
img_rgb = cv2.cvtColor(IMG, cv2.COLOR_BGR2RGB)
# 2. Resize → 640x640
img_resized = cv2.resize(img_rgb, (IMG_SIZE, IMG_SIZE))
# 3. HWC → CHW
img_chw = img_resized.transpose(2, 0, 1)
# 4. expand batch
img_input = np.expand_dims(img_chw, axis=0).astype(np.uint8)
# ========= Inference =========
outputs = rknn_lite.inference(inputs=[img_input])
if outputs is None:
print("⚠️ 推理失败")
return IMG, False
# ========= reshape heads =========
# YOLO11 和 YOLOv5 一样有 3 个输出 headS=80/40/20
head0 = outputs[0].reshape([3, -1] + list(outputs[0].shape[-2:])) # (3,85,80,80)
head1 = outputs[1].reshape([3, -1] + list(outputs[1].shape[-2:])) # (3,85,40,40)
head2 = outputs[2].reshape([3, -1] + list(outputs[2].shape[-2:])) # (3,85,20,20)
# transpose 到符合 yolov5 后处理尺寸 (H,W,3,85)
input_data = [
np.transpose(head0, (2, 3, 0, 1)),
np.transpose(head1, (2, 3, 0, 1)),
np.transpose(head2, (2, 3, 0, 1)),
]
# ========= YOLOv5 后处理 =========
boxes, classes, scores = yolov5_post_process(input_data)
# 只保留 car 类别COCO = 类别 2
if boxes is not None:
keep = np.where(classes == 2)
boxes = boxes[keep]
scores = scores[keep]
classes = classes[keep]
# ========= 画框 =========
img_out = IMG.copy()
if boxes is not None and len(boxes) > 0:
draw(img_out, boxes, scores, classes)
return img_out, True

View File

@ -0,0 +1,54 @@
import cv2
import time
from rknnpool import rknnPoolExecutor
# 图像处理函数,实际应用过程中需要自行修改
from func_cls5 import myFunc
# ---------------- 用户配置 ----------------
RTSP_URL = "rtsp://admin:XJ123456@192.168.250.61:554/streaming/channels/102"
modelPath = "./rknnModel/cls5.rknn"
# 打开视频流
cap = cv2.VideoCapture(RTSP_URL, cv2.CAP_FFMPEG)
if not cap.isOpened():
print("❌ 无法打开视频流")
exit(0)
# 线程数, 增大可提高帧率
TPEs = 3
# 初始化rknn池
pool = rknnPoolExecutor(
rknnModel=modelPath,
TPEs=TPEs,
func=myFunc)
# 初始化异步所需要的帧
if (cap.isOpened()):
for i in range(TPEs + 1):
ret, frame = cap.read()
if not ret:
cap.release()
del pool
exit(-1)
pool.put(frame)
frames, loopTime, initTime = 0, time.time(), time.time()
while (cap.isOpened()):
frames += 1
ret, frame = cap.read()
if not ret:
break
pool.put(frame)
frame, flag = pool.get()
if flag == False:
break
cv2.imshow('test', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if frames % 30 == 0:
print("30帧平均帧率:\t", 30 / (time.time() - loopTime), "")
loopTime = time.time()
print("总平均帧率\t", frames / (time.time() - initTime))
# 释放cap和rknn线程池
cap.release()
cv2.destroyAllWindows()
pool.release()

View File

@ -0,0 +1,54 @@
import cv2
import time
from rknnpool import rknnPoolExecutor
# 图像处理函数,实际应用过程中需要自行修改
from func_cls import myFunc
# ---------------- 用户配置 ----------------
RTSP_URL = "rtsp://admin:XJ123456@192.168.250.61:554/streaming/channels/102"
modelPath = "./rknnModel/cls.rknn"
# 打开视频流
cap = cv2.VideoCapture(RTSP_URL, cv2.CAP_FFMPEG)
if not cap.isOpened():
print("❌ 无法打开视频流")
exit(0)
# 线程数, 增大可提高帧率
TPEs = 3
# 初始化rknn池
pool = rknnPoolExecutor(
rknnModel=modelPath,
TPEs=TPEs,
func=myFunc)
# 初始化异步所需要的帧
if (cap.isOpened()):
for i in range(TPEs + 1):
ret, frame = cap.read()
if not ret:
cap.release()
del pool
exit(-1)
pool.put(frame)
frames, loopTime, initTime = 0, time.time(), time.time()
while (cap.isOpened()):
frames += 1
ret, frame = cap.read()
if not ret:
break
pool.put(frame)
frame, flag = pool.get()
if flag == False:
break
cv2.imshow('test', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if frames % 30 == 0:
print("30帧平均帧率:\t", 30 / (time.time() - loopTime), "")
loopTime = time.time()
print("总平均帧率\t", frames / (time.time() - initTime))
# 释放cap和rknn线程池
cap.release()
cv2.destroyAllWindows()
pool.release()

View File

@ -0,0 +1,54 @@
import cv2
import time
from rknnpool import rknnPoolExecutor
# 图像处理函数,实际应用过程中需要自行修改
from func_obb import myFunc
# ---------------- 用户配置 ----------------
RTSP_URL = "rtsp://admin:XJ123456@192.168.250.61:554/streaming/channels/102"
modelPath = "./rknnModel/obb.rknn"
# 打开视频流
cap = cv2.VideoCapture(RTSP_URL, cv2.CAP_FFMPEG)
if not cap.isOpened():
print("❌ 无法打开视频流")
exit(0)
# 线程数, 增大可提高帧率
TPEs = 3
# 初始化rknn池
pool = rknnPoolExecutor(
rknnModel=modelPath,
TPEs=TPEs,
func=myFunc)
# 初始化异步所需要的帧
if (cap.isOpened()):
for i in range(TPEs + 1):
ret, frame = cap.read()
if not ret:
cap.release()
del pool
exit(-1)
pool.put(frame)
frames, loopTime, initTime = 0, time.time(), time.time()
while (cap.isOpened()):
frames += 1
ret, frame = cap.read()
if not ret:
break
pool.put(frame)
frame, flag = pool.get()
if flag == False:
break
cv2.imshow('test', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if frames % 30 == 0:
print("30帧平均帧率:\t", 30 / (time.time() - loopTime), "")
loopTime = time.time()
print("总平均帧率\t", frames / (time.time() - initTime))
# 释放cap和rknn线程池
cap.release()
cv2.destroyAllWindows()
pool.release()

View File

@ -0,0 +1,31 @@
# 请切换到root用户
# CPU定频
echo "CPU0-3 可用频率:"
sudo cat /sys/devices/system/cpu/cpufreq/policy0/scaling_available_frequencies
sudo echo userspace > /sys/devices/system/cpu/cpufreq/policy0/scaling_governor
sudo echo 1800000 > /sys/devices/system/cpu/cpufreq/policy0/scaling_setspeed
echo "CPU0-3 当前频率:"
sudo cat /sys/devices/system/cpu/cpufreq/policy0/cpuinfo_cur_freq
echo "CPU4-5 可用频率:"
sudo cat /sys/devices/system/cpu/cpufreq/policy4/scaling_available_frequencies
sudo echo userspace > /sys/devices/system/cpu/cpufreq/policy4/scaling_governor
sudo echo 2400000 > /sys/devices/system/cpu/cpufreq/policy4/scaling_setspeed
echo "CPU4-5 当前频率:"
sudo cat /sys/devices/system/cpu/cpufreq/policy4/cpuinfo_cur_freq
echo "CPU6-7 可用频率:"
sudo cat /sys/devices/system/cpu/cpufreq/policy6/scaling_available_frequencies
sudo echo userspace > /sys/devices/system/cpu/cpufreq/policy6/scaling_governor
sudo echo 2400000 > /sys/devices/system/cpu/cpufreq/policy6/scaling_setspeed
echo "CPU6-7 当前频率:"
sudo cat /sys/devices/system/cpu/cpufreq/policy6/cpuinfo_cur_freq
# NPU定频
echo "NPU 可用频率:"
sudo cat /sys/class/devfreq/fdab0000.npu/available_frequencies
sudo echo userspace > /sys/class/devfreq/fdab0000.npu/governor
sudo echo 1000000000 > /sys/class/devfreq/fdab0000.npu/userspace/set_freq
echo "NPU 当前频率:"
sudo cat /sys/class/devfreq/fdab0000.npu/cur_freq

View File

@ -0,0 +1,5 @@
# 查看温度
sensors
# 查看NPU占用
echo "当前NPU占用:"
sudo cat /sys/kernel/debug/rknpu/load

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,59 @@
from queue import Queue
from rknnlite.api import RKNNLite
from concurrent.futures import ThreadPoolExecutor, as_completed
def initRKNN(rknnModel="./rknnModel/yolov5s.rknn", id=0):
rknn_lite = RKNNLite()
ret = rknn_lite.load_rknn(rknnModel)
if ret != 0:
print("Load RKNN rknnModel failed")
exit(ret)
if id == 0:
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
elif id == 1:
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_1)
elif id == 2:
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_2)
elif id == -1:
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)
else:
ret = rknn_lite.init_runtime()
if ret != 0:
print("Init runtime environment failed")
exit(ret)
print(rknnModel, "\t\tdone")
return rknn_lite
def initRKNNs(rknnModel="./rknnModel/yolov5s.rknn", TPEs=1):
rknn_list = []
for i in range(TPEs):
rknn_list.append(initRKNN(rknnModel, i % 3))
return rknn_list
class rknnPoolExecutor():
def __init__(self, rknnModel, TPEs, func):
self.TPEs = TPEs
self.queue = Queue()
self.rknnPool = initRKNNs(rknnModel, TPEs)
self.pool = ThreadPoolExecutor(max_workers=TPEs)
self.func = func
self.num = 0
def put(self, frame):
self.queue.put(self.pool.submit(
self.func, self.rknnPool[self.num % self.TPEs], frame))
self.num += 1
def get(self):
if self.queue.empty():
return None, False
fut = self.queue.get()
return fut.result(), True
def release(self):
self.pool.shutdown()
for rknn_lite in self.rknnPool:
rknn_lite.release()