Files
SupervisorAI/npu_yolo_onnx_person_car_phone.py
2026-01-09 13:32:49 +08:00

153 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 文件名: npu_yolo_onnx.py
import cv2
import numpy as np
import onnxruntime as ort
import os
import time
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
shape = img.shape[:2] # h, w
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
dw /= 2
dh /= 2
if shape[::-1] != new_unpad:
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return img, r, (dw, dh)
class YOLOv8_ONNX:
def __init__(self, onnx_path, conf_threshold=0.25, iou_threshold=0.45, input_size=640):
providers = [("CANNExecutionProvider", {
"device_id": 0,
"arena_extend_strategy": "kNextPowerOfTwo",
"npu_mem_limit": 16 * 1024 * 1024 * 1024,
"precision_mode": "allow_fp32_to_fp16",
"op_select_impl_mode": "high_precision",
"enable_cann_graph": True,
}),
"CUDAExecutionProvider",
"CPUExecutionProvider",
]
self.session = ort.InferenceSession(onnx_path, providers=providers)
actual_providers = self.session.get_providers()
print("YOLO Providers:", actual_providers)
if "CANNExecutionProvider" in actual_providers:
print("[INFO] YOLO 使用 CANNExecutionProvider昇腾 NPU")
elif 'CUDAExecutionProvider' in actual_providers:
print("[INFO] YOLO 使用 CUDAExecutionProviderNVIDIA GPU")
else:
print("[INFO] YOLO 使用 CPUExecutionProvider")
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
self.input_name = self.session.get_inputs()[0].name
self.input_size = (input_size, input_size) if isinstance(input_size, int) else input_size
print(f"模型输入名称: {self.input_name}")
print(f"模型输入形状: {self.session.get_inputs()[0].shape}")
print(f"模型输出形状: {self.session.get_outputs()[0].shape}")
def preprocess(self, img):
self.orig_shape = img.shape[:2]
img, self.ratio, (self.dw, self.dh) = letterbox(img, self.input_size)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.transpose(2, 0, 1).astype(np.float32)
img /= 255.0
img = np.expand_dims(img, axis=0)
return img
def postprocess(self, pred, im0_shape):
# 1. 转置:从 [1, 4+cls, 8400] -> [8400, 4+cls]
pred = pred[0].T
# 2. 获取数据
boxes = pred[:, :4] # cx, cy, w, h
scores = pred[:, 4:]
# 3. 获取最大置信度和类别
conf = np.max(scores, axis=1)
class_pred = np.argmax(scores, axis=1)
# 4. 初步过滤
mask = conf > self.conf_threshold
if not mask.any():
return []
boxes = boxes[mask]
conf = conf[mask]
class_pred = class_pred[mask]
# =========================================================
# 还原坐标 (逆 Letterbox)
# =========================================================
boxes[:, 0] = (boxes[:, 0] - self.dw) / self.ratio # cx
boxes[:, 1] = (boxes[:, 1] - self.dh) / self.ratio # cy
boxes[:, 2] = boxes[:, 2] / self.ratio # w
boxes[:, 3] = boxes[:, 3] / self.ratio # h
# 转换格式Center(cx,cy) -> TopLeft(x,y)
x = boxes[:, 0] - boxes[:, 2] / 2
y = boxes[:, 1] - boxes[:, 3] / 2
w = boxes[:, 2]
h = boxes[:, 3]
# 原始框(用于最终输出)
bboxes_original = np.stack([x, y, w, h], axis=1)
# =========================================================
# 【核心修复】Class-Aware NMS (偏移量法)
# 给不同类别的框增加不同的偏移量,使得不同类别的框绝对不会重叠
# 从而避免 "车" 把 "人" 过滤掉的情况
# =========================================================
max_wh = 4096 # 只要大于图片最大分辨率即可
class_offset = class_pred * max_wh
# NMS 专用的框坐标 (加上了偏移量)
bboxes_for_nms = bboxes_original.copy()
bboxes_for_nms[:, 0] += class_offset
bboxes_for_nms[:, 1] += class_offset
# =========================================================
# 执行 NMS
# =========================================================
indices = cv2.dnn.NMSBoxes(
bboxes_for_nms.tolist(),
conf.tolist(),
self.conf_threshold,
self.iou_threshold
)
result = []
if len(indices) > 0:
indices = indices.flatten()
for i in indices:
# 注意:这里取数据要从 bboxes_original 取 (没有加偏移量的)
bx, by, bw, bh = bboxes_original[i]
# 转换回 x1, y1, x2, y2 供业务代码画图使用
x1 = np.clip(bx, 0, im0_shape[1])
y1 = np.clip(by, 0, im0_shape[0])
x2 = np.clip(bx + bw, 0, im0_shape[1])
y2 = np.clip(by + bh, 0, im0_shape[0])
result.append([
float(x1),
float(y1),
float(x2),
float(y2),
float(conf[i]),
int(class_pred[i])
])
return result
def __call__(self, frame):
input_data = self.preprocess(frame)
pred = self.session.run(None, {self.input_name: input_data})[0]
results = self.postprocess(pred, frame.shape[:2])
return results