Files
SupervisorAI/npu_yolo_onnx_yolo11n.py

169 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 文件名: npu_yolo_onnx.py
import cv2
import numpy as np
import onnxruntime as ort
import os
import time
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
shape = img.shape[:2] # h, w
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
dw /= 2
dh /= 2
if shape[::-1] != new_unpad:
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return img, r, (dw, dh)
class YOLOv8_ONNX:
def __init__(self, onnx_path, conf_threshold=0.25, iou_threshold=0.45, preprocess_size_1=640,preprocess_size_2=640):
# 使用 CANNExecutionProvider
providers = [("CANNExecutionProvider", {
"device_id": 0,
"arena_extend_strategy": "kNextPowerOfTwo",
"npu_mem_limit": 16 * 1024 * 1024 * 1024,
"precision_mode": "allow_fp32_to_fp16", # 修改:不降精度:must_keep_origin_dtype
"op_select_impl_mode": "high_precision",
"enable_cann_graph": True,
}),
"CUDAExecutionProvider",
"CPUExecutionProvider", # 自动 fallback
]
# 创建 SessionORT 自动忽略不存在的 EP不会抛异常
self.session = ort.InferenceSession(onnx_path, providers=providers)
# 获取真实工作 provider
actual_providers = self.session.get_providers()
print("YOLO Providers:", actual_providers)
if "CANNExecutionProvider" in actual_providers:
print("[INFO] YOLO 使用 CANNExecutionProvider昇腾")
else:
print("[INFO] YOLO 使用 CPUExecutionProvider非昇腾环境")
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
self.input_name = self.session.get_inputs()[0].name
self.preprocess_size_1 = preprocess_size_1
self.preprocess_size_2 = preprocess_size_2
print(f"YOLO模型输入名称: {self.input_name}")
print(f"YOLO模型输入形状: {self.session.get_inputs()[0].shape}")
print(f"YOLO模型输出形状: {self.session.get_outputs()[0].shape}")
def preprocess(self, img):
self.orig_shape = img.shape[:2]
img, self.ratio, (self.dw, self.dh) = letterbox(img, (self.preprocess_size_1, self.preprocess_size_2))
# ===== 新增保存letterbox处理后的图像 =====
# 确保保存目录存在(如不存在则创建)
# save_dir = "../YOLO_Pipe_results"
# os.makedirs(save_dir, exist_ok=True)
# # 生成唯一文件名(例如按时间戳命名,避免覆盖)
# timestamp = int(time.time() * 1000) # 毫秒级时间戳
# save_path = os.path.join(save_dir, f"letterbox_{timestamp}.jpg")
# # 注意letterbox处理后的img是BGR格式因为输入的img是BGRletterbox未改变通道顺序
# cv2.imwrite(save_path, img)
# print(f"letterbox处理后的图像已保存至{save_path}")
# ==========================================
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.transpose(2, 0, 1).astype(np.float32)
img /= 255.0
img = np.expand_dims(img, axis=0) # (1,3,640,640)
return img
def postprocess_v8(self, pred, im0_shape):
x = pred[0].T # (8400, 84)
boxes = x[:, :4]
scores = x[:, 4:] # 80 classes
class_ids = np.argmax(scores, axis=1)
conf = scores[np.arange(len(scores)), class_ids]
# 只保留 personCOCO class 0
mask = (class_ids == 0) & (conf > self.conf_threshold)
if not mask.any():
return []
boxes = boxes[mask]
conf = conf[mask]
class_ids = class_ids[mask]
# xywh → xyxy
x1 = boxes[:, 0] - boxes[:, 2] / 2
y1 = boxes[:, 1] - boxes[:, 3] / 2
x2 = boxes[:, 0] + boxes[:, 2] / 2
y2 = boxes[:, 1] + boxes[:, 3] / 2
# 去 letterbox
x1 = (x1 - self.dw) / self.ratio
y1 = (y1 - self.dh) / self.ratio
x2 = (x2 - self.dw) / self.ratio
y2 = (y2 - self.dh) / self.ratio
x1 = np.clip(x1, 0, im0_shape[1])
y1 = np.clip(y1, 0, im0_shape[0])
x2 = np.clip(x2, 0, im0_shape[1])
y2 = np.clip(y2, 0, im0_shape[0])
bboxes = np.stack([x1, y1, x2, y2], axis=1)
indices = cv2.dnn.NMSBoxes(
bboxes.tolist(),
conf.tolist(),
self.conf_threshold,
self.iou_threshold
)
results = []
if len(indices) > 0:
indices = indices.flatten()
for i in indices:
results.append([
int(bboxes[i, 0]),
int(bboxes[i, 1]),
int(bboxes[i, 2]),
int(bboxes[i, 3]),
float(conf[i]),
0 # person
])
return results
def __call__(self, frame):
# ===== 前处理计时 =====
t_pre_start = time.perf_counter()
input_data = self.preprocess(frame)
t_pre_end = time.perf_counter()
# ===== 推理计时 =====
t_inf_start = time.perf_counter()
pred = self.session.run(None, {self.input_name: input_data})[0]
t_inf_end = time.perf_counter()
# ===== 后处理计时 =====
t_post_start = time.perf_counter()
results = self.postprocess_v8(pred, frame.shape)
t_post_end = time.perf_counter()
# ===== 打印耗时(毫秒)=====
pre_ms = (t_pre_end - t_pre_start) * 1000
inf_ms = (t_inf_end - t_inf_start) * 1000
post_ms = (t_post_end - t_post_start) * 1000
total_ms = pre_ms + inf_ms + post_ms
# print(
# f"[YOLO] Pre:{pre_ms:6.2f}ms | Inf:{inf_ms:6.2f}ms | Post:{post_ms:6.2f}ms | Total:{total_ms:6.2f}ms | Dets:{len(results)}")
return results