Files
SupervisorAI/backup/cuda_t.py
2025-12-20 18:07:49 +08:00

242 lines
8.8 KiB
Python

import torch
import onnxruntime as ort
import insightface
import subprocess
import sys
import os
def detailed_diagnosis():
"""详细诊断脚本"""
print("=" * 60)
print("详细CUDA诊断")
print("=" * 60)
# 1. 系统信息
print("\n📋 1. 系统信息:")
print(f"Python版本: {sys.version}")
print(f"Python路径: {sys.executable}")
print(f"Conda环境: {sys.prefix}")
# 2. PyTorch详细信息
print("\n🔥 2. PyTorch详细信息:")
print(f"PyTorch版本: {torch.__version__}")
print(f"PyTorch路径: {torch.__file__}")
print(f"CUDA可用: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"CUDA版本: {torch.version.cuda}")
print(f"GPU数量: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
print(f" 内存: {torch.cuda.get_device_properties(i).total_memory / 1024 ** 3:.1f} GB")
else:
print("❌ PyTorch无法使用CUDA")
# 检查可能的原因
print("\n🔍 PyTorch CUDA问题排查:")
print(f" torch.cuda.is_available(): {torch.cuda.is_available()}")
try:
print(f" CUDA设备数量: {torch.cuda.device_count()}")
except:
print(" CUDA设备数量: 无法获取")
# 3. ONNX Runtime详细信息
print("\n⚡ 3. ONNX Runtime详细信息:")
print(f"ONNX Runtime版本: {ort.__version__}")
print(f"ONNX Runtime路径: {ort.__file__}")
available_providers = ort.get_available_providers()
print(f"可用Providers: {available_providers}")
if 'CUDAExecutionProvider' in available_providers:
print("✅ CUDAExecutionProvider可用")
# 测试CUDA provider
try:
options = ort.SessionOptions()
session = ort.InferenceSession(
os.path.join(os.path.dirname(insightface.__file__), 'models', 'buffalo_l', '1k3d68.onx'),
providers=['CUDAExecutionProvider'],
sess_options=options
)
print("✅ CUDAExecutionProvider测试通过")
except Exception as e:
print(f"❌ CUDAExecutionProvider测试失败: {e}")
else:
print("❌ CUDAExecutionProvider不可用")
# 4. InsightFace信息
print("\n👁️ 4. InsightFace信息:")
print(f"InsightFace版本: {insightface.__version__}")
print(f"InsightFace路径: {insightface.__file__}")
# 5. 系统CUDA检查
print("\n🖥️ 5. 系统CUDA检查:")
try:
# 检查nvidia-smi
result = subprocess.run(['nvidia-smi'], capture_output=True, text=True, timeout=10)
if result.returncode == 0:
print("✅ nvidia-smi可用")
# 提取关键信息
lines = result.stdout.split('\n')
for line in lines:
if 'Driver Version' in line:
print(f" 驱动版本: {line.strip()}")
if 'CUDA Version' in line:
print(f" CUDA版本: {line.strip()}")
else:
print("❌ nvidia-smi不可用")
except (subprocess.TimeoutExpired, FileNotFoundError, Exception) as e:
print(f"❌ nvidia-smi执行失败: {e}")
try:
# 检查nvcc
result = subprocess.run(['nvcc', '--version'], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
print("✅ nvcc可用")
version_line = result.stdout.split('\n')[3] if len(result.stdout.split('\n')) > 3 else result.stdout
print(f" {version_line.strip()}")
else:
print("❌ nvcc不可用")
except (subprocess.TimeoutExpired, FileNotFoundError, Exception) as e:
print(f"❌ nvcc执行失败: {e}")
# 6. 环境变量检查
print("\n🌍 6. 环境变量检查:")
cuda_paths = []
for key, value in os.environ.items():
if 'CUDA' in key.upper() or 'CUDNN' in key.upper():
print(f" {key}: {value}")
if 'PATH' in key or 'HOME' in key:
cuda_paths.append((key, value))
# 7. 包版本兼容性检查
print("\n📦 7. 包版本兼容性检查:")
try:
import pkg_resources
packages = ['torch', 'torchvision', 'torchaudio', 'onnxruntime', 'insightface', 'opencv-python', 'numpy']
for pkg in packages:
try:
version = pkg_resources.get_distribution(pkg).version
print(f" {pkg}: {version}")
except:
print(f" {pkg}: 未安装")
except:
print(" 无法检查包版本")
# 8. 实际性能测试
print("\n🚀 8. 实际性能测试:")
try:
# 测试InsightFace实际使用
app_cpu = insightface.app.FaceAnalysis(name='buffalo_l')
app_cpu.prepare(ctx_id=-1) # 强制CPU
app_gpu = insightface.app.FaceAnalysis(name='buffalo_l')
app_gpu.prepare(ctx_id=0) # 强制GPU
# 创建测试图像
import numpy as np
import cv2
test_img = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
# CPU测试
import time
print(" CPU测试...")
start_time = time.time()
for _ in range(5):
faces_cpu = app_cpu.get(test_img)
cpu_time = (time.time() - start_time) * 1000 / 5
# GPU测试
print(" GPU测试...")
start_time = time.time()
for _ in range(5):
faces_gpu = app_gpu.get(test_img)
gpu_time = (time.time() - start_time) * 1000 / 5
print(f" CPU平均时间: {cpu_time:.1f}ms")
print(f" GPU平均时间: {gpu_time:.1f}ms")
if gpu_time < cpu_time * 0.8: # GPU应该比CPU快
print(" ✅ GPU加速生效")
else:
print(" ⚠️ GPU加速未生效或效果不明显")
except Exception as e:
print(f" ❌ 性能测试失败: {e}")
def check_package_installation():
"""检查包安装情况"""
print("\n" + "=" * 60)
print("包安装检查")
print("=" * 60)
packages = {
'torch': 'PyTorch (深度学习框架)',
'torchvision': 'PyTorch视觉库',
'torchaudio': 'PyTorch音频库',
'onnxruntime': 'ONNX Runtime (推理引擎)',
'insightface': '人脸识别库',
'opencv-python': 'OpenCV (图像处理)',
'numpy': '数值计算库'
}
for pkg, desc in packages.items():
try:
if pkg == 'torch':
import torch
version = torch.__version__
cuda_status = "✅ CUDA可用" if torch.cuda.is_available() else "❌ CUDA不可用"
print(f"{pkg} ({desc}): {version} {cuda_status}")
elif pkg == 'onnxruntime':
import onnxruntime as ort
version = ort.__version__
providers = ort.get_available_providers()
cuda_status = "✅ 有GPU支持" if 'CUDAExecutionProvider' in providers else "❌ 无GPU支持"
print(f"{pkg} ({desc}): {version} {cuda_status}")
else:
module = __import__(pkg)
version = getattr(module, '__version__', '未知版本')
print(f"{pkg} ({desc}): {version}")
except ImportError:
print(f"{pkg} ({desc}): ❌ 未安装")
if __name__ == "__main__":
detailed_diagnosis()
check_package_installation()
# 提供解决方案
print("\n" + "=" * 60)
print("解决方案建议")
print("=" * 60)
# 基于诊断结果给出建议
if not torch.cuda.is_available():
print("\n❌ 主要问题: PyTorch没有CUDA支持")
print("💡 解决方案:")
print("1. 完全卸载当前PyTorch:")
print(" pip uninstall torch torchvision torchaudio")
print("2. 安装GPU版本的PyTorch:")
print(" pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118")
if 'CUDAExecutionProvider' not in ort.get_available_providers():
print("\n❌ 主要问题: ONNX Runtime没有GPU支持")
print("💡 解决方案:")
print("1. 卸载CPU版本:")
print(" pip uninstall onnxruntime")
print("2. 安装GPU版本:")
print(" pip install onnxruntime-gpu")
# 通用建议
print("\n🔄 通用建议:")
print("1. 创建全新的conda环境:")
print(" conda create -n face_gpu python=3.10")
print(" conda activate face_gpu")
print("2. 按顺序安装:")
print(" pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118")
print(" pip install onnxruntime-gpu")
print(" pip install insightface opencv-python")
print("3. 验证安装:")
print(
" python -c \"import torch; print(f'PyTorch CUDA: {torch.cuda.is_available()}'); import onnxruntime as ort; print(f'ONNX Providers: {ort.get_available_providers()}')\"")