引入rtsp及动作识别等

2025-12-21 19:37:32 +08:00
parent 3cbaf67765
commit 51bf38f84c
2 changed files with 1244 additions and 0 deletions
--- a/src/npu_yolo_onnx_yolo11n.py
+++ b/src/npu_yolo_onnx_yolo11n.py
@@ -0,0 +1,169 @@
+# 文件名: npu_yolo_onnx.py
+import cv2
+import numpy as np
+import onnxruntime as ort
+import os
+import time
+
+
+def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
+    shape = img.shape[:2]  # h, w
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
+    dw /= 2
+    dh /= 2
+    if shape[::-1] != new_unpad:
+        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
+    return img, r, (dw, dh)
+
+
+class YOLOv8_ONNX:
+    def __init__(self, onnx_path, conf_threshold=0.25, iou_threshold=0.45, preprocess_size_1=640,preprocess_size_2=640):
+        # 使用 CANNExecutionProvider
+        providers = [("CANNExecutionProvider", {
+            "device_id": 0,
+            "arena_extend_strategy": "kNextPowerOfTwo",
+            "npu_mem_limit": 16 * 1024 * 1024 * 1024,
+            "precision_mode": "allow_fp32_to_fp16",  # 修改：不降精度:must_keep_origin_dtype
+            "op_select_impl_mode": "high_precision",
+            "enable_cann_graph": True,
+            }),
+            "CUDAExecutionProvider",
+            "CPUExecutionProvider",  # 自动 fallback
+
+        ]
+
+        # 创建 Session（ORT 自动忽略不存在的 EP，不会抛异常）
+        self.session = ort.InferenceSession(onnx_path, providers=providers)
+
+        # 获取真实工作 provider
+        actual_providers = self.session.get_providers()
+
+        print("YOLO Providers:", actual_providers)
+
+        if "CANNExecutionProvider" in actual_providers:
+            print("[INFO] YOLO 使用 CANNExecutionProvider（昇腾）")
+        else:
+            print("[INFO] YOLO 使用 CPUExecutionProvider（非昇腾环境）")
+
+        self.conf_threshold = conf_threshold
+        self.iou_threshold = iou_threshold
+        self.input_name = self.session.get_inputs()[0].name
+        self.preprocess_size_1 = preprocess_size_1
+        self.preprocess_size_2 = preprocess_size_2
+
+        print(f"YOLO模型输入名称: {self.input_name}")
+        print(f"YOLO模型输入形状: {self.session.get_inputs()[0].shape}")
+        print(f"YOLO模型输出形状: {self.session.get_outputs()[0].shape}")
+
+    def preprocess(self, img):
+        self.orig_shape = img.shape[:2]
+        img, self.ratio, (self.dw, self.dh) = letterbox(img, (self.preprocess_size_1, self.preprocess_size_2))
+
+        # ===== 新增：保存letterbox处理后的图像 =====
+        # 确保保存目录存在（如不存在则创建）
+        # save_dir = "../YOLO_Pipe_results"
+        # os.makedirs(save_dir, exist_ok=True)
+        # # 生成唯一文件名（例如按时间戳命名，避免覆盖）
+        # timestamp = int(time.time() * 1000)  # 毫秒级时间戳
+        # save_path = os.path.join(save_dir, f"letterbox_{timestamp}.jpg")
+        # # 注意：letterbox处理后的img是BGR格式（因为输入的img是BGR，letterbox未改变通道顺序）
+        # cv2.imwrite(save_path, img)
+        # print(f"letterbox处理后的图像已保存至：{save_path}")
+        # ==========================================
+
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = img.transpose(2, 0, 1).astype(np.float32)
+        img /= 255.0
+        img = np.expand_dims(img, axis=0)  # (1,3,640,640)
+        return img
+
+    def postprocess_v8(self, pred, im0_shape):
+        x = pred[0].T  # (8400, 84)
+
+        boxes = x[:, :4]
+        scores = x[:, 4:]  # 80 classes
+
+        class_ids = np.argmax(scores, axis=1)
+        conf = scores[np.arange(len(scores)), class_ids]
+
+        # 只保留 person（COCO class 0）
+        mask = (class_ids == 0) & (conf > self.conf_threshold)
+        if not mask.any():
+            return []
+
+        boxes = boxes[mask]
+        conf = conf[mask]
+        class_ids = class_ids[mask]
+
+        # xywh → xyxy
+        x1 = boxes[:, 0] - boxes[:, 2] / 2
+        y1 = boxes[:, 1] - boxes[:, 3] / 2
+        x2 = boxes[:, 0] + boxes[:, 2] / 2
+        y2 = boxes[:, 1] + boxes[:, 3] / 2
+
+        # 去 letterbox
+        x1 = (x1 - self.dw) / self.ratio
+        y1 = (y1 - self.dh) / self.ratio
+        x2 = (x2 - self.dw) / self.ratio
+        y2 = (y2 - self.dh) / self.ratio
+
+        x1 = np.clip(x1, 0, im0_shape[1])
+        y1 = np.clip(y1, 0, im0_shape[0])
+        x2 = np.clip(x2, 0, im0_shape[1])
+        y2 = np.clip(y2, 0, im0_shape[0])
+
+        bboxes = np.stack([x1, y1, x2, y2], axis=1)
+
+        indices = cv2.dnn.NMSBoxes(
+            bboxes.tolist(),
+            conf.tolist(),
+            self.conf_threshold,
+            self.iou_threshold
+        )
+
+        results = []
+        if len(indices) > 0:
+            indices = indices.flatten()
+            for i in indices:
+                results.append([
+                    int(bboxes[i, 0]),
+                    int(bboxes[i, 1]),
+                    int(bboxes[i, 2]),
+                    int(bboxes[i, 3]),
+                    float(conf[i]),
+                    0  # person
+                ])
+
+        return results
+
+    def __call__(self, frame):
+        # ===== 前处理计时 =====
+        t_pre_start = time.perf_counter()
+        input_data = self.preprocess(frame)
+        t_pre_end = time.perf_counter()
+
+        # ===== 推理计时 =====
+        t_inf_start = time.perf_counter()
+        pred = self.session.run(None, {self.input_name: input_data})[0]
+        t_inf_end = time.perf_counter()
+
+        # ===== 后处理计时 =====
+        t_post_start = time.perf_counter()
+        results = self.postprocess_v8(pred, frame.shape)
+        t_post_end = time.perf_counter()
+
+        # ===== 打印耗时（毫秒）=====
+        pre_ms = (t_pre_end - t_pre_start) * 1000
+        inf_ms = (t_inf_end - t_inf_start) * 1000
+        post_ms = (t_post_end - t_post_start) * 1000
+        total_ms = pre_ms + inf_ms + post_ms
+
+        # print(
+        #     f"[YOLO] Pre:{pre_ms:6.2f}ms | Inf:{inf_ms:6.2f}ms | Post:{post_ms:6.2f}ms | Total:{total_ms:6.2f}ms | Dets:{len(results)}")
+
+        return results
--- a/src/rtsp_service_ws_1217.py
+++ b/src/rtsp_service_ws_1217.py