Files
algorithm/backend/app/services/deployment.py
2026-02-08 14:42:58 +08:00

583 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""部署服务模块负责Docker容器管理、镜像构建和自动部署"""
import os
import sys
import subprocess
import json
import tempfile
import shutil
import time
from typing import Optional, List, Dict, Any
import docker
from sqlalchemy.orm import Session
import logging
from app.models.models import Algorithm, AlgorithmVersion
from app.schemas.algorithm import AlgorithmCreate
logger = logging.getLogger(__name__)
class DeploymentService:
"""部署服务类负责Docker容器管理和自动部署"""
def __init__(self):
try:
# 连接Docker守护进程
self.client = docker.from_env()
except Exception as e:
logger.error(f"Failed to connect to Docker: {str(e)}")
self.client = None
def detect_dependencies(self, code: str) -> List[str]:
"""
自动检测Python代码中的依赖
Args:
code: Python代码字符串
Returns:
依赖包列表
"""
dependencies = []
# 常见的导入语句检测
import_patterns = [
'import ',
'from ',
'pip install '
]
# 常见的Python包
common_packages = {
'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'torch', 'keras',
'opencv-python', 'pillow', 'matplotlib', 'seaborn', 'nltk', 'spacy',
'transformers', 'fastapi', 'flask', 'requests', 'urllib3', 'beautifulsoup4',
'sqlalchemy', 'pymongo', 'redis', 'psycopg2', 'pymysql', 'onnxruntime'
}
for line in code.split('\n'):
line = line.strip()
for pattern in import_patterns:
if line.startswith(pattern):
# 提取包名
if pattern == 'import ':
parts = line.split()[1].split('.')
if parts[0] in common_packages:
dependencies.append(parts[0])
elif pattern == 'from ':
parts = line.split()[1].split('.')
if parts[0] in common_packages:
dependencies.append(parts[0])
elif pattern == 'pip install ':
package = line.split(' ', 2)[2]
dependencies.append(package)
# 去重
return list(set(dependencies))
def build_algorithm_image(self, algorithm_name: str, code: str, dependencies: Optional[List[str]] = None) -> Dict[str, Any]:
"""
构建算法Docker镜像
Args:
algorithm_name: 算法名称
code: Python代码
dependencies: 依赖包列表如果为None则自动检测
Returns:
包含镜像名称和构建日志的字典
"""
try:
build_logs = []
# 自动检测依赖
build_logs.append(f"开始构建镜像: {algorithm_name}")
if dependencies is None:
build_logs.append("自动检测依赖...")
dependencies = self.detect_dependencies(code)
build_logs.append(f"检测到依赖: {', '.join(dependencies)}")
else:
build_logs.append(f"使用指定依赖: {', '.join(dependencies)}")
# 创建临时目录
build_logs.append("创建构建环境...")
with tempfile.TemporaryDirectory() as temp_dir:
# 生成Dockerfile
dockerfile_content = self._generate_dockerfile(dependencies)
dockerfile_path = os.path.join(temp_dir, 'Dockerfile')
with open(dockerfile_path, 'w') as f:
f.write(dockerfile_content)
build_logs.append("生成Dockerfile完成")
# 生成算法代码文件
algorithm_path = os.path.join(temp_dir, 'algorithm.py')
with open(algorithm_path, 'w') as f:
f.write(code)
build_logs.append("生成算法代码文件完成")
# 生成API服务文件
api_path = os.path.join(temp_dir, 'app.py')
with open(api_path, 'w') as f:
f.write(self._generate_api_service())
build_logs.append("生成API服务文件完成")
# 构建镜像
image_name = f"algorithm-{algorithm_name.lower().replace(' ', '-')}:latest"
build_logs.append(f"开始构建镜像: {image_name}")
logger.info(f"Building Docker image: {image_name}")
logger.info(f"Dependencies: {dependencies}")
# 使用Docker SDK构建镜像
if self.client:
image, logs = self.client.images.build(
path=temp_dir,
tag=image_name,
rm=True
)
# 打印构建日志
for log in logs:
if 'stream' in log:
log_message = log['stream'].strip()
if log_message:
build_logs.append(log_message)
logger.info(log_message)
else:
# 备用方案使用subprocess
build_logs.append("使用subprocess构建镜像...")
result = subprocess.run(
['docker', 'build', '-t', image_name, temp_dir],
capture_output=True,
text=True
)
if result.returncode != 0:
error_message = f"Docker build failed: {result.stderr}"
build_logs.append(error_message)
logger.error(error_message)
raise Exception(error_message)
for line in result.stdout.strip().split('\n'):
if line:
build_logs.append(line)
logger.info(line)
success_message = f"Successfully built image: {image_name}"
build_logs.append(success_message)
logger.info(success_message)
return {
'image_name': image_name,
'logs': build_logs,
'success': True
}
except Exception as e:
error_message = f"Failed to build algorithm image: {str(e)}"
logger.error(error_message)
return {
'image_name': None,
'logs': [error_message],
'success': False
}
def deploy_algorithm(self, algorithm_id: str, image_name: str, port: int = None) -> Dict[str, Any]:
"""
部署算法容器
Args:
algorithm_id: 算法ID
image_name: 镜像名称
port: 端口号如果为None则自动分配
Returns:
部署信息
"""
try:
# 自动分配端口
if port is None:
port = self._get_available_port()
# 容器名称
container_name = f"algorithm-{algorithm_id}"
logger.info(f"Deploying container: {container_name} on port {port}")
# 停止并移除同名容器
self._stop_and_remove_container(container_name)
# 启动容器
if self.client:
container = self.client.containers.run(
image_name,
name=container_name,
ports={'8000/tcp': port},
detach=True,
restart_policy={'Name': 'unless-stopped'}
)
container_id = container.id
else:
# 备用方案使用subprocess
result = subprocess.run(
[
'docker', 'run',
'--name', container_name,
'-p', f'{port}:8000',
'-d',
'--restart', 'unless-stopped',
image_name
],
capture_output=True,
text=True
)
if result.returncode != 0:
logger.error(f"Failed to run container: {result.stderr}")
raise Exception(f"Failed to run container: {result.stderr}")
container_id = result.stdout.strip()
# 等待容器启动
time.sleep(2)
# 验证容器状态
container_status = self.get_container_status(container_name)
deployment_info = {
'container_name': container_name,
'container_id': container_id,
'image_name': image_name,
'port': port,
'status': container_status,
'api_url': f"http://localhost:{port}"
}
logger.info(f"Successfully deployed algorithm: {deployment_info}")
return deployment_info
except Exception as e:
logger.error(f"Failed to deploy algorithm: {str(e)}")
raise
def get_container_status(self, container_name: str) -> str:
"""
获取容器状态
Args:
container_name: 容器名称
Returns:
容器状态
"""
try:
if self.client:
container = self.client.containers.get(container_name)
return container.status
else:
# 备用方案使用subprocess
result = subprocess.run(
['docker', 'inspect', '--format', '{{.State.Status}}', container_name],
capture_output=True,
text=True
)
if result.returncode == 0:
return result.stdout.strip()
else:
return 'unknown'
except Exception as e:
logger.error(f"Failed to get container status: {str(e)}")
return 'error'
def stop_container(self, container_name: str) -> bool:
"""
停止容器
Args:
container_name: 容器名称
Returns:
是否成功
"""
try:
if self.client:
container = self.client.containers.get(container_name)
container.stop()
else:
# 备用方案使用subprocess
result = subprocess.run(
['docker', 'stop', container_name],
capture_output=True,
text=True
)
if result.returncode != 0:
logger.error(f"Failed to stop container: {result.stderr}")
return False
logger.info(f"Successfully stopped container: {container_name}")
return True
except Exception as e:
logger.error(f"Failed to stop container: {str(e)}")
return False
def remove_container(self, container_name: str) -> bool:
"""
移除容器
Args:
container_name: 容器名称
Returns:
是否成功
"""
try:
if self.client:
container = self.client.containers.get(container_name)
container.remove(force=True)
else:
# 备用方案使用subprocess
result = subprocess.run(
['docker', 'rm', '-f', container_name],
capture_output=True,
text=True
)
if result.returncode != 0:
logger.error(f"Failed to remove container: {result.stderr}")
return False
logger.info(f"Successfully removed container: {container_name}")
return True
except Exception as e:
logger.error(f"Failed to remove container: {str(e)}")
return False
def list_containers(self) -> List[Dict[str, Any]]:
"""
列出所有算法容器
Returns:
容器列表
"""
containers = []
try:
if self.client:
for container in self.client.containers.list(all=True):
if container.name.startswith('algorithm-'):
containers.append({
'name': container.name,
'id': container.id,
'image': container.image.tags[0] if container.image.tags else 'unknown',
'status': container.status,
'ports': container.ports,
'created': container.attrs['Created']
})
else:
# 备用方案使用subprocess
result = subprocess.run(
['docker', 'ps', '-a', '--format', '{{json .}}'],
capture_output=True,
text=True
)
if result.returncode == 0:
for line in result.stdout.strip().split('\n'):
if line:
container_info = json.loads(line)
if container_info['Names'].startswith('/algorithm-'):
containers.append({
'name': container_info['Names'].lstrip('/'),
'id': container_info['ID'],
'image': container_info['Image'],
'status': container_info['Status'],
'ports': container_info['Ports'],
'created': container_info['CreatedAt']
})
except Exception as e:
logger.error(f"Failed to list containers: {str(e)}")
return containers
def _generate_dockerfile(self, dependencies: List[str]) -> str:
"""
生成Dockerfile
Args:
dependencies: 依赖包列表
Returns:
Dockerfile内容
"""
dockerfile = """
FROM python:3.9-slim
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y \
gcc \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# 安装Python依赖
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 复制应用代码
COPY . .
# 暴露端口
EXPOSE 8000
# 启动服务
CMD ["python", "app.py"]
"""
# 生成requirements.txt内容
requirements = """
fastapi
uvicorn
pydantic
python-multipart
numpy
"""
# 为模型部署添加必要的依赖
model_deps = {'onnxruntime', 'scikit-learn'}
for dep in model_deps:
if dep not in dependencies:
requirements += f"{dep}\n"
for dep in dependencies:
if dep not in model_deps:
requirements += f"{dep}\n"
# 将requirements.txt内容添加到Dockerfile
dockerfile = dockerfile.replace('COPY requirements.txt .', f"RUN echo '{requirements}' > requirements.txt")
return dockerfile
def _generate_api_service(self) -> str:
"""
生成API服务代码
Returns:
API服务代码
"""
api_code = """
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
import importlib.util
import sys
import os
# 导入算法模块
spec = importlib.util.spec_from_file_location("algorithm", "algorithm.py")
algorithm = importlib.util.module_from_spec(spec)
sys.modules["algorithm"] = algorithm
spec.loader.exec_module(algorithm)
app = FastAPI()
class AlgorithmInput(BaseModel):
input_data: dict
params: dict = {}
class AlgorithmOutput(BaseModel):
success: bool
result: dict
error: str = ""
@app.post("/execute", response_model=AlgorithmOutput)
def execute_algorithm(input_data: AlgorithmInput):
# 执行算法
try:
# 调用算法的execute函数
if hasattr(algorithm, 'execute'):
result = algorithm.execute(input_data.input_data, input_data.params)
return AlgorithmOutput(
success=True,
result=result
)
else:
raise Exception("Algorithm module does not have execute function")
except Exception as e:
return AlgorithmOutput(
success=False,
result={},
error=str(e)
)
@app.get("/health")
def health_check():
# 健康检查
return {"status": "healthy"}
@app.get("/")
def root():
# 根路径
return {"message": "Algorithm API Service"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
"""
return api_code
def _get_available_port(self) -> int:
"""
获取可用端口
Returns:
可用端口号
"""
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('', 0))
return s.getsockname()[1]
def _stop_and_remove_container(self, container_name: str):
"""
停止并移除容器
Args:
container_name: 容器名称
"""
try:
if self.client:
# 尝试获取容器
try:
container = self.client.containers.get(container_name)
# 停止容器
container.stop()
# 移除容器
container.remove()
except docker.errors.NotFound:
pass
else:
# 备用方案使用subprocess
# 停止容器
subprocess.run(['docker', 'stop', container_name], capture_output=True)
# 移除容器
subprocess.run(['docker', 'rm', '-f', container_name], capture_output=True)
except Exception as e:
logger.error(f"Failed to stop and remove container: {str(e)}")
# 全局部署服务实例
deployment_service = DeploymentService()