# final_gpu_test.py import torch import onnxruntime as ort import insightface import cv2 import numpy as np import time import os from typing import List, Dict def comprehensive_gpu_test(): """全面的GPU测试""" print("=" * 60) print("最终GPU加速测试") print("=" * 60) # 1. 环境验证 print("1. 环境验证:") print(f"✅ PyTorch: {torch.__version__}") print(f"✅ PyTorch CUDA: {torch.cuda.is_available()}") print(f"✅ GPU设备: {torch.cuda.get_device_name(0)}") print(f"✅ ONNX Runtime GPU支持: {'CUDAExecutionProvider' in ort.get_available_providers()}") print(f"✅ OpenCV: {cv2.__version__}") print(f"✅ InsightFace: {insightface.__version__}") # 2. 创建测试数据 print("\n2. 创建测试数据...") os.makedirs("test_data", exist_ok=True) # 创建目标人脸 target_face = np.random.randint(0, 255, (400, 400, 3), dtype=np.uint8) cv2.rectangle(target_face, (150, 150), (250, 250), (255, 255, 255), -1) # 简单的人脸模拟 cv2.imwrite("test_data/target_face.jpg", target_face) # 创建测试图像(多个人脸) test_image = np.random.randint(0, 255, (800, 600, 3), dtype=np.uint8) # 添加多个人脸区域 cv2.rectangle(test_image, (100, 100), (200, 200), (255, 255, 255), -1) cv2.rectangle(test_image, (300, 150), (400, 250), (255, 255, 255), -1) cv2.rectangle(test_image, (500, 200), (600, 300), (255, 255, 255), -1) cv2.imwrite("test_data/multi_face_test.jpg", test_image) print("✅ 测试数据创建完成") # 3. GPU加速的人脸识别系统 print("\n3. 初始化GPU人脸识别系统...") class FastGPUFaceRecognition: def __init__(self): self.app = insightface.app.FaceAnalysis(name='buffalo_l') self.app.prepare( ctx_id=0, # GPU 0 det_thresh=0.3, # 检测阈值 det_size=(640, 640) ) self.target_embedding = None print("✅ GPU人脸识别系统初始化完成") def set_target_face(self, image_path: str): """设置目标人脸""" img = cv2.imread(image_path) faces = self.app.get(img) if faces: self.target_embedding = faces[0].embedding print(f"✅ 目标人脸特征提取完成") return True return False def process_image(self, image_path: str, output_path: str): """处理图像并返回结果""" start_time = time.time() img = cv2.imread(image_path) if img is None: return None # GPU推理 faces = self.app.get(img) results = [] for i, face in enumerate(faces): similarity = 0.0 if self.target_embedding is not None: # 计算相似度 emb1 = face.embedding / np.linalg.norm(face.embedding) emb2 = self.target_embedding / np.linalg.norm(self.target_embedding) similarity = float(np.dot(emb1, emb2)) results.append({ 'face_index': i, 'bbox': face.bbox.astype(int).tolist(), 'similarity': similarity, 'det_score': face.det_score }) # 绘制结果 bbox = face.bbox.astype(int) color = (0, 255, 0) if similarity > 0.6 else (0, 0, 255) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) cv2.putText(img, f"{similarity:.3f}", (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) # 保存结果 cv2.imwrite(output_path, img) processing_time = (time.time() - start_time) * 1000 return { 'faces_detected': len(faces), 'processing_time_ms': processing_time, 'results': results } # 初始化系统 system = FastGPUFaceRecognition() # 4. 性能测试 print("\n4. 性能测试:") # 设置目标人脸 if system.set_target_face("test_data/target_face.jpg"): # 处理测试图像 result = system.process_image("test_data/multi_face_test.jpg", "test_data/gpu_result.jpg") if result: print(f"✅ 处理完成:") print(f" 检测到人脸数: {result['faces_detected']}") print(f" 处理时间: {result['processing_time_ms']:.1f}ms") for face in result['results']: status = "匹配" if face['similarity'] > 0.6 else "不匹配" print(f" 人脸 {face['face_index'] + 1}: 相似度 {face['similarity']:.3f} ({status})") # 5. 批量性能测试 print("\n5. 批量性能测试:") # 创建多个测试图像 test_images = [] for i in range(5): test_img = np.random.randint(0, 255, (600, 800, 3), dtype=np.uint8) cv2.rectangle(test_img, (100, 100), (200, 200), (255, 255, 255), -1) img_path = f"test_data/batch_test_{i}.jpg" cv2.imwrite(img_path, test_img) test_images.append(img_path) total_time = 0 total_faces = 0 for i, img_path in enumerate(test_images): result = system.process_image(img_path, f"test_data/batch_result_{i}.jpg") if result: total_time += result['processing_time_ms'] total_faces += result['faces_detected'] print(f" 图像 {i + 1}: {result['processing_time_ms']:.1f}ms, {result['faces_detected']}张人脸") if len(test_images) > 0: avg_time = total_time / len(test_images) print(f" 平均处理时间: {avg_time:.1f}ms/张") print(f" 总检测人脸: {total_faces}张") # 6. 与CPU性能对比(可选) print("\n6. GPU vs CPU 性能对比:") try: # GPU测试 gpu_times = [] test_img = cv2.imread("test_data/multi_face_test.jpg") for _ in range(10): start = time.time() faces = system.app.get(test_img) gpu_times.append((time.time() - start) * 1000) # CPU测试(创建新的CPU实例) cpu_app = insightface.app.FaceAnalysis(name='buffalo_l') cpu_app.prepare(ctx_id=-1) # CPU cpu_times = [] for _ in range(10): start = time.time() faces = cpu_app.get(test_img) cpu_times.append((time.time() - start) * 1000) avg_gpu = np.mean(gpu_times) avg_cpu = np.mean(cpu_times) print(f" GPU平均推理时间: {avg_gpu:.1f}ms") print(f" CPU平均推理时间: {avg_cpu:.1f}ms") print(f" GPU加速比: {avg_cpu / avg_gpu:.1f}x") except Exception as e: print(f" 性能对比测试跳过: {e}") print("\n" + "=" * 60) print("🎉 GPU加速测试完成!") print("✅ 现在您的人脸识别系统正在使用GPU加速") print("📁 结果图像保存在 test_data/ 目录中") print("=" * 60) if __name__ == "__main__": comprehensive_gpu_test()