引入yolo监狱识别代码

2026-01-09 13:32:49 +08:00
parent c9d2fcd0dd
commit 9b81c10248
3 changed files with 799 additions and 1 deletions
--- a/npu_yolo_onnx_person_car_phone.py
+++ b/npu_yolo_onnx_person_car_phone.py
@@ -0,0 +1,153 @@
+# 文件名: npu_yolo_onnx.py
+import cv2
+import numpy as np
+import onnxruntime as ort
+import os
+import time
+
+def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
+    shape = img.shape[:2]  # h, w
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
+    dw /= 2
+    dh /= 2
+    if shape[::-1] != new_unpad:
+        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
+    return img, r, (dw, dh)
+
+class YOLOv8_ONNX:
+    def __init__(self, onnx_path, conf_threshold=0.25, iou_threshold=0.45, input_size=640):
+        providers = [("CANNExecutionProvider", {
+            "device_id": 0,
+            "arena_extend_strategy": "kNextPowerOfTwo",
+            "npu_mem_limit": 16 * 1024 * 1024 * 1024,
+            "precision_mode": "allow_fp32_to_fp16",
+            "op_select_impl_mode": "high_precision",
+            "enable_cann_graph": True,
+        }),
+            "CUDAExecutionProvider",
+            "CPUExecutionProvider",
+        ]
+
+        self.session = ort.InferenceSession(onnx_path, providers=providers)
+        actual_providers = self.session.get_providers()
+        print("YOLO Providers:", actual_providers)
+
+        if "CANNExecutionProvider" in actual_providers:
+            print("[INFO] YOLO 使用 CANNExecutionProvider（昇腾 NPU）")
+        elif 'CUDAExecutionProvider' in actual_providers:
+            print("[INFO] YOLO 使用 CUDAExecutionProvider（NVIDIA GPU）")
+        else:
+            print("[INFO] YOLO 使用 CPUExecutionProvider")
+
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+        self.input_name = self.session.get_inputs()[0].name
+        self.input_size = (input_size, input_size) if isinstance(input_size, int) else input_size
+
+        print(f"模型输入名称: {self.input_name}")
+        print(f"模型输入形状: {self.session.get_inputs()[0].shape}")
+        print(f"模型输出形状: {self.session.get_outputs()[0].shape}")
+
+    def preprocess(self, img):
+        self.orig_shape = img.shape[:2]
+        img, self.ratio, (self.dw, self.dh) = letterbox(img, self.input_size)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = img.transpose(2, 0, 1).astype(np.float32)
+        img /= 255.0
+        img = np.expand_dims(img, axis=0)
+        return img
+
+    def postprocess(self, pred, im0_shape):
+        # 1. 转置：从 [1, 4+cls, 8400] -> [8400, 4+cls]
+        pred = pred[0].T
+
+        # 2. 获取数据
+        boxes = pred[:, :4]  # cx, cy, w, h
+        scores = pred[:, 4:]
+
+        # 3. 获取最大置信度和类别
+        conf = np.max(scores, axis=1)
+        class_pred = np.argmax(scores, axis=1)
+
+        # 4. 初步过滤
+        mask = conf > self.conf_threshold
+        if not mask.any():
+            return []
+
+        boxes = boxes[mask]
+        conf = conf[mask]
+        class_pred = class_pred[mask]
+
+        # =========================================================
+        # 还原坐标 (逆 Letterbox)
+        # =========================================================
+        boxes[:, 0] = (boxes[:, 0] - self.dw) / self.ratio  # cx
+        boxes[:, 1] = (boxes[:, 1] - self.dh) / self.ratio  # cy
+        boxes[:, 2] = boxes[:, 2] / self.ratio  # w
+        boxes[:, 3] = boxes[:, 3] / self.ratio  # h
+
+        # 转换格式：Center(cx,cy) -> TopLeft(x,y)
+        x = boxes[:, 0] - boxes[:, 2] / 2
+        y = boxes[:, 1] - boxes[:, 3] / 2
+        w = boxes[:, 2]
+        h = boxes[:, 3]
+
+        # 原始框（用于最终输出）
+        bboxes_original = np.stack([x, y, w, h], axis=1)
+
+        # =========================================================
+        # 【核心修复】：Class-Aware NMS (偏移量法)
+        # 给不同类别的框增加不同的偏移量，使得不同类别的框绝对不会重叠
+        # 从而避免 "车" 把 "人" 过滤掉的情况
+        # =========================================================
+        max_wh = 4096  # 只要大于图片最大分辨率即可
+        class_offset = class_pred * max_wh
+
+        # NMS 专用的框坐标 (加上了偏移量)
+        bboxes_for_nms = bboxes_original.copy()
+        bboxes_for_nms[:, 0] += class_offset
+        bboxes_for_nms[:, 1] += class_offset
+
+        # =========================================================
+        # 执行 NMS
+        # =========================================================
+        indices = cv2.dnn.NMSBoxes(
+            bboxes_for_nms.tolist(),
+            conf.tolist(),
+            self.conf_threshold,
+            self.iou_threshold
+        )
+
+        result = []
+        if len(indices) > 0:
+            indices = indices.flatten()
+            for i in indices:
+                # 注意：这里取数据要从 bboxes_original 取 (没有加偏移量的)
+                bx, by, bw, bh = bboxes_original[i]
+
+                # 转换回 x1, y1, x2, y2 供业务代码画图使用
+                x1 = np.clip(bx, 0, im0_shape[1])
+                y1 = np.clip(by, 0, im0_shape[0])
+                x2 = np.clip(bx + bw, 0, im0_shape[1])
+                y2 = np.clip(by + bh, 0, im0_shape[0])
+
+                result.append([
+                    float(x1),
+                    float(y1),
+                    float(x2),
+                    float(y2),
+                    float(conf[i]),
+                    int(class_pred[i])
+                ])
+        return result
+
+    def __call__(self, frame):
+        input_data = self.preprocess(frame)
+        pred = self.session.run(None, {self.input_name: input_data})[0]
+        results = self.postprocess(pred, frame.shape[:2])
+        return results