引入yolo监狱识别代码
This commit is contained in:
153
npu_yolo_onnx_person_car_phone.py
Normal file
153
npu_yolo_onnx_person_car_phone.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# 文件名: npu_yolo_onnx.py
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
import os
|
||||
import time
|
||||
|
||||
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
|
||||
shape = img.shape[:2] # h, w
|
||||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
||||
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
|
||||
dw /= 2
|
||||
dh /= 2
|
||||
if shape[::-1] != new_unpad:
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
||||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
|
||||
return img, r, (dw, dh)
|
||||
|
||||
class YOLOv8_ONNX:
|
||||
def __init__(self, onnx_path, conf_threshold=0.25, iou_threshold=0.45, input_size=640):
|
||||
providers = [("CANNExecutionProvider", {
|
||||
"device_id": 0,
|
||||
"arena_extend_strategy": "kNextPowerOfTwo",
|
||||
"npu_mem_limit": 16 * 1024 * 1024 * 1024,
|
||||
"precision_mode": "allow_fp32_to_fp16",
|
||||
"op_select_impl_mode": "high_precision",
|
||||
"enable_cann_graph": True,
|
||||
}),
|
||||
"CUDAExecutionProvider",
|
||||
"CPUExecutionProvider",
|
||||
]
|
||||
|
||||
self.session = ort.InferenceSession(onnx_path, providers=providers)
|
||||
actual_providers = self.session.get_providers()
|
||||
print("YOLO Providers:", actual_providers)
|
||||
|
||||
if "CANNExecutionProvider" in actual_providers:
|
||||
print("[INFO] YOLO 使用 CANNExecutionProvider(昇腾 NPU)")
|
||||
elif 'CUDAExecutionProvider' in actual_providers:
|
||||
print("[INFO] YOLO 使用 CUDAExecutionProvider(NVIDIA GPU)")
|
||||
else:
|
||||
print("[INFO] YOLO 使用 CPUExecutionProvider")
|
||||
|
||||
self.conf_threshold = conf_threshold
|
||||
self.iou_threshold = iou_threshold
|
||||
self.input_name = self.session.get_inputs()[0].name
|
||||
self.input_size = (input_size, input_size) if isinstance(input_size, int) else input_size
|
||||
|
||||
print(f"模型输入名称: {self.input_name}")
|
||||
print(f"模型输入形状: {self.session.get_inputs()[0].shape}")
|
||||
print(f"模型输出形状: {self.session.get_outputs()[0].shape}")
|
||||
|
||||
def preprocess(self, img):
|
||||
self.orig_shape = img.shape[:2]
|
||||
img, self.ratio, (self.dw, self.dh) = letterbox(img, self.input_size)
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
img = img.transpose(2, 0, 1).astype(np.float32)
|
||||
img /= 255.0
|
||||
img = np.expand_dims(img, axis=0)
|
||||
return img
|
||||
|
||||
def postprocess(self, pred, im0_shape):
|
||||
# 1. 转置:从 [1, 4+cls, 8400] -> [8400, 4+cls]
|
||||
pred = pred[0].T
|
||||
|
||||
# 2. 获取数据
|
||||
boxes = pred[:, :4] # cx, cy, w, h
|
||||
scores = pred[:, 4:]
|
||||
|
||||
# 3. 获取最大置信度和类别
|
||||
conf = np.max(scores, axis=1)
|
||||
class_pred = np.argmax(scores, axis=1)
|
||||
|
||||
# 4. 初步过滤
|
||||
mask = conf > self.conf_threshold
|
||||
if not mask.any():
|
||||
return []
|
||||
|
||||
boxes = boxes[mask]
|
||||
conf = conf[mask]
|
||||
class_pred = class_pred[mask]
|
||||
|
||||
# =========================================================
|
||||
# 还原坐标 (逆 Letterbox)
|
||||
# =========================================================
|
||||
boxes[:, 0] = (boxes[:, 0] - self.dw) / self.ratio # cx
|
||||
boxes[:, 1] = (boxes[:, 1] - self.dh) / self.ratio # cy
|
||||
boxes[:, 2] = boxes[:, 2] / self.ratio # w
|
||||
boxes[:, 3] = boxes[:, 3] / self.ratio # h
|
||||
|
||||
# 转换格式:Center(cx,cy) -> TopLeft(x,y)
|
||||
x = boxes[:, 0] - boxes[:, 2] / 2
|
||||
y = boxes[:, 1] - boxes[:, 3] / 2
|
||||
w = boxes[:, 2]
|
||||
h = boxes[:, 3]
|
||||
|
||||
# 原始框(用于最终输出)
|
||||
bboxes_original = np.stack([x, y, w, h], axis=1)
|
||||
|
||||
# =========================================================
|
||||
# 【核心修复】:Class-Aware NMS (偏移量法)
|
||||
# 给不同类别的框增加不同的偏移量,使得不同类别的框绝对不会重叠
|
||||
# 从而避免 "车" 把 "人" 过滤掉的情况
|
||||
# =========================================================
|
||||
max_wh = 4096 # 只要大于图片最大分辨率即可
|
||||
class_offset = class_pred * max_wh
|
||||
|
||||
# NMS 专用的框坐标 (加上了偏移量)
|
||||
bboxes_for_nms = bboxes_original.copy()
|
||||
bboxes_for_nms[:, 0] += class_offset
|
||||
bboxes_for_nms[:, 1] += class_offset
|
||||
|
||||
# =========================================================
|
||||
# 执行 NMS
|
||||
# =========================================================
|
||||
indices = cv2.dnn.NMSBoxes(
|
||||
bboxes_for_nms.tolist(),
|
||||
conf.tolist(),
|
||||
self.conf_threshold,
|
||||
self.iou_threshold
|
||||
)
|
||||
|
||||
result = []
|
||||
if len(indices) > 0:
|
||||
indices = indices.flatten()
|
||||
for i in indices:
|
||||
# 注意:这里取数据要从 bboxes_original 取 (没有加偏移量的)
|
||||
bx, by, bw, bh = bboxes_original[i]
|
||||
|
||||
# 转换回 x1, y1, x2, y2 供业务代码画图使用
|
||||
x1 = np.clip(bx, 0, im0_shape[1])
|
||||
y1 = np.clip(by, 0, im0_shape[0])
|
||||
x2 = np.clip(bx + bw, 0, im0_shape[1])
|
||||
y2 = np.clip(by + bh, 0, im0_shape[0])
|
||||
|
||||
result.append([
|
||||
float(x1),
|
||||
float(y1),
|
||||
float(x2),
|
||||
float(y2),
|
||||
float(conf[i]),
|
||||
int(class_pred[i])
|
||||
])
|
||||
return result
|
||||
|
||||
def __call__(self, frame):
|
||||
input_data = self.preprocess(frame)
|
||||
pred = self.session.run(None, {self.input_name: input_data})[0]
|
||||
results = self.postprocess(pred, frame.shape[:2])
|
||||
return results
|
||||
Reference in New Issue
Block a user