# final_gpu_test.py
import torch
import onnxruntime as ort
import insightface
import cv2
import numpy as np
import time
import os
from typing import List, Dict


def comprehensive_gpu_test():
    """全面的GPU测试"""
    print("=" * 60)
    print("最终GPU加速测试")
    print("=" * 60)

    # 1. 环境验证
    print("1. 环境验证:")
    print(f"✅ PyTorch: {torch.__version__}")
    print(f"✅ PyTorch CUDA: {torch.cuda.is_available()}")
    print(f"✅ GPU设备: {torch.cuda.get_device_name(0)}")
    print(f"✅ ONNX Runtime GPU支持: {'CUDAExecutionProvider' in ort.get_available_providers()}")
    print(f"✅ OpenCV: {cv2.__version__}")
    print(f"✅ InsightFace: {insightface.__version__}")

    # 2. 创建测试数据
    print("\n2. 创建测试数据...")
    os.makedirs("test_data", exist_ok=True)

    # 创建目标人脸
    target_face = np.random.randint(0, 255, (400, 400, 3), dtype=np.uint8)
    cv2.rectangle(target_face, (150, 150), (250, 250), (255, 255, 255), -1)  # 简单的人脸模拟
    cv2.imwrite("test_data/target_face.jpg", target_face)

    # 创建测试图像（多个人脸）
    test_image = np.random.randint(0, 255, (800, 600, 3), dtype=np.uint8)
    # 添加多个人脸区域
    cv2.rectangle(test_image, (100, 100), (200, 200), (255, 255, 255), -1)
    cv2.rectangle(test_image, (300, 150), (400, 250), (255, 255, 255), -1)
    cv2.rectangle(test_image, (500, 200), (600, 300), (255, 255, 255), -1)
    cv2.imwrite("test_data/multi_face_test.jpg", test_image)

    print("✅ 测试数据创建完成")

    # 3. GPU加速的人脸识别系统
    print("\n3. 初始化GPU人脸识别系统...")

    class FastGPUFaceRecognition:
        def __init__(self):
            self.app = insightface.app.FaceAnalysis(name='buffalo_l')
            self.app.prepare(
                ctx_id=0,  # GPU 0
                det_thresh=0.3,  # 检测阈值
                det_size=(640, 640)
            )
            self.target_embedding = None
            print("✅ GPU人脸识别系统初始化完成")

        def set_target_face(self, image_path: str):
            """设置目标人脸"""
            img = cv2.imread(image_path)
            faces = self.app.get(img)
            if faces:
                self.target_embedding = faces[0].embedding
                print(f"✅ 目标人脸特征提取完成")
                return True
            return False

        def process_image(self, image_path: str, output_path: str):
            """处理图像并返回结果"""
            start_time = time.time()

            img = cv2.imread(image_path)
            if img is None:
                return None

            # GPU推理
            faces = self.app.get(img)

            results = []
            for i, face in enumerate(faces):
                similarity = 0.0
                if self.target_embedding is not None:
                    # 计算相似度
                    emb1 = face.embedding / np.linalg.norm(face.embedding)
                    emb2 = self.target_embedding / np.linalg.norm(self.target_embedding)
                    similarity = float(np.dot(emb1, emb2))

                results.append({
                    'face_index': i,
                    'bbox': face.bbox.astype(int).tolist(),
                    'similarity': similarity,
                    'det_score': face.det_score
                })

                # 绘制结果
                bbox = face.bbox.astype(int)
                color = (0, 255, 0) if similarity > 0.6 else (0, 0, 255)
                cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
                cv2.putText(img, f"{similarity:.3f}", (bbox[0], bbox[1] - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

            # 保存结果
            cv2.imwrite(output_path, img)

            processing_time = (time.time() - start_time) * 1000
            return {
                'faces_detected': len(faces),
                'processing_time_ms': processing_time,
                'results': results
            }

    # 初始化系统
    system = FastGPUFaceRecognition()

    # 4. 性能测试
    print("\n4. 性能测试:")

    # 设置目标人脸
    if system.set_target_face("test_data/target_face.jpg"):
        # 处理测试图像
        result = system.process_image("test_data/multi_face_test.jpg", "test_data/gpu_result.jpg")

        if result:
            print(f"✅ 处理完成:")
            print(f"   检测到人脸数: {result['faces_detected']}")
            print(f"   处理时间: {result['processing_time_ms']:.1f}ms")

            for face in result['results']:
                status = "匹配" if face['similarity'] > 0.6 else "不匹配"
                print(f"   人脸 {face['face_index'] + 1}: 相似度 {face['similarity']:.3f} ({status})")

    # 5. 批量性能测试
    print("\n5. 批量性能测试:")

    # 创建多个测试图像
    test_images = []
    for i in range(5):
        test_img = np.random.randint(0, 255, (600, 800, 3), dtype=np.uint8)
        cv2.rectangle(test_img, (100, 100), (200, 200), (255, 255, 255), -1)
        img_path = f"test_data/batch_test_{i}.jpg"
        cv2.imwrite(img_path, test_img)
        test_images.append(img_path)

    total_time = 0
    total_faces = 0

    for i, img_path in enumerate(test_images):
        result = system.process_image(img_path, f"test_data/batch_result_{i}.jpg")
        if result:
            total_time += result['processing_time_ms']
            total_faces += result['faces_detected']
            print(f"   图像 {i + 1}: {result['processing_time_ms']:.1f}ms, {result['faces_detected']}张人脸")

    if len(test_images) > 0:
        avg_time = total_time / len(test_images)
        print(f"   平均处理时间: {avg_time:.1f}ms/张")
        print(f"   总检测人脸: {total_faces}张")

    # 6. 与CPU性能对比（可选）
    print("\n6. GPU vs CPU 性能对比:")
    try:
        # GPU测试
        gpu_times = []
        test_img = cv2.imread("test_data/multi_face_test.jpg")
        for _ in range(10):
            start = time.time()
            faces = system.app.get(test_img)
            gpu_times.append((time.time() - start) * 1000)

        # CPU测试（创建新的CPU实例）
        cpu_app = insightface.app.FaceAnalysis(name='buffalo_l')
        cpu_app.prepare(ctx_id=-1)  # CPU

        cpu_times = []
        for _ in range(10):
            start = time.time()
            faces = cpu_app.get(test_img)
            cpu_times.append((time.time() - start) * 1000)

        avg_gpu = np.mean(gpu_times)
        avg_cpu = np.mean(cpu_times)

        print(f"   GPU平均推理时间: {avg_gpu:.1f}ms")
        print(f"   CPU平均推理时间: {avg_cpu:.1f}ms")
        print(f"   GPU加速比: {avg_cpu / avg_gpu:.1f}x")

    except Exception as e:
        print(f"   性能对比测试跳过: {e}")

    print("\n" + "=" * 60)
    print("🎉 GPU加速测试完成！")
    print("✅ 现在您的人脸识别系统正在使用GPU加速")
    print("📁 结果图像保存在 test_data/ 目录中")
    print("=" * 60)


if __name__ == "__main__":
    comprehensive_gpu_test()