583 lines
19 KiB
Python
583 lines
19 KiB
Python
"""部署服务模块,负责Docker容器管理、镜像构建和自动部署"""
|
||
|
||
import os
|
||
import sys
|
||
import subprocess
|
||
import json
|
||
import tempfile
|
||
import shutil
|
||
import time
|
||
from typing import Optional, List, Dict, Any
|
||
import docker
|
||
from sqlalchemy.orm import Session
|
||
import logging
|
||
|
||
from app.models.models import Algorithm, AlgorithmVersion
|
||
from app.schemas.algorithm import AlgorithmCreate
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class DeploymentService:
|
||
"""部署服务类,负责Docker容器管理和自动部署"""
|
||
|
||
def __init__(self):
|
||
try:
|
||
# 连接Docker守护进程
|
||
self.client = docker.from_env()
|
||
except Exception as e:
|
||
logger.error(f"Failed to connect to Docker: {str(e)}")
|
||
self.client = None
|
||
|
||
def detect_dependencies(self, code: str) -> List[str]:
|
||
"""
|
||
自动检测Python代码中的依赖
|
||
|
||
Args:
|
||
code: Python代码字符串
|
||
|
||
Returns:
|
||
依赖包列表
|
||
"""
|
||
dependencies = []
|
||
|
||
# 常见的导入语句检测
|
||
import_patterns = [
|
||
'import ',
|
||
'from ',
|
||
'pip install '
|
||
]
|
||
|
||
# 常见的Python包
|
||
common_packages = {
|
||
'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'torch', 'keras',
|
||
'opencv-python', 'pillow', 'matplotlib', 'seaborn', 'nltk', 'spacy',
|
||
'transformers', 'fastapi', 'flask', 'requests', 'urllib3', 'beautifulsoup4',
|
||
'sqlalchemy', 'pymongo', 'redis', 'psycopg2', 'pymysql', 'onnxruntime'
|
||
}
|
||
|
||
for line in code.split('\n'):
|
||
line = line.strip()
|
||
for pattern in import_patterns:
|
||
if line.startswith(pattern):
|
||
# 提取包名
|
||
if pattern == 'import ':
|
||
parts = line.split()[1].split('.')
|
||
if parts[0] in common_packages:
|
||
dependencies.append(parts[0])
|
||
elif pattern == 'from ':
|
||
parts = line.split()[1].split('.')
|
||
if parts[0] in common_packages:
|
||
dependencies.append(parts[0])
|
||
elif pattern == 'pip install ':
|
||
package = line.split(' ', 2)[2]
|
||
dependencies.append(package)
|
||
|
||
# 去重
|
||
return list(set(dependencies))
|
||
|
||
def build_algorithm_image(self, algorithm_name: str, code: str, dependencies: Optional[List[str]] = None) -> Dict[str, Any]:
|
||
"""
|
||
构建算法Docker镜像
|
||
|
||
Args:
|
||
algorithm_name: 算法名称
|
||
code: Python代码
|
||
dependencies: 依赖包列表(如果为None则自动检测)
|
||
|
||
Returns:
|
||
包含镜像名称和构建日志的字典
|
||
"""
|
||
try:
|
||
build_logs = []
|
||
|
||
# 自动检测依赖
|
||
build_logs.append(f"开始构建镜像: {algorithm_name}")
|
||
if dependencies is None:
|
||
build_logs.append("自动检测依赖...")
|
||
dependencies = self.detect_dependencies(code)
|
||
build_logs.append(f"检测到依赖: {', '.join(dependencies)}")
|
||
else:
|
||
build_logs.append(f"使用指定依赖: {', '.join(dependencies)}")
|
||
|
||
# 创建临时目录
|
||
build_logs.append("创建构建环境...")
|
||
with tempfile.TemporaryDirectory() as temp_dir:
|
||
# 生成Dockerfile
|
||
dockerfile_content = self._generate_dockerfile(dependencies)
|
||
dockerfile_path = os.path.join(temp_dir, 'Dockerfile')
|
||
|
||
with open(dockerfile_path, 'w') as f:
|
||
f.write(dockerfile_content)
|
||
build_logs.append("生成Dockerfile完成")
|
||
|
||
# 生成算法代码文件
|
||
algorithm_path = os.path.join(temp_dir, 'algorithm.py')
|
||
with open(algorithm_path, 'w') as f:
|
||
f.write(code)
|
||
build_logs.append("生成算法代码文件完成")
|
||
|
||
# 生成API服务文件
|
||
api_path = os.path.join(temp_dir, 'app.py')
|
||
with open(api_path, 'w') as f:
|
||
f.write(self._generate_api_service())
|
||
build_logs.append("生成API服务文件完成")
|
||
|
||
# 构建镜像
|
||
image_name = f"algorithm-{algorithm_name.lower().replace(' ', '-')}:latest"
|
||
build_logs.append(f"开始构建镜像: {image_name}")
|
||
|
||
logger.info(f"Building Docker image: {image_name}")
|
||
logger.info(f"Dependencies: {dependencies}")
|
||
|
||
# 使用Docker SDK构建镜像
|
||
if self.client:
|
||
image, logs = self.client.images.build(
|
||
path=temp_dir,
|
||
tag=image_name,
|
||
rm=True
|
||
)
|
||
|
||
# 打印构建日志
|
||
for log in logs:
|
||
if 'stream' in log:
|
||
log_message = log['stream'].strip()
|
||
if log_message:
|
||
build_logs.append(log_message)
|
||
logger.info(log_message)
|
||
else:
|
||
# 备用方案:使用subprocess
|
||
build_logs.append("使用subprocess构建镜像...")
|
||
result = subprocess.run(
|
||
['docker', 'build', '-t', image_name, temp_dir],
|
||
capture_output=True,
|
||
text=True
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
error_message = f"Docker build failed: {result.stderr}"
|
||
build_logs.append(error_message)
|
||
logger.error(error_message)
|
||
raise Exception(error_message)
|
||
|
||
for line in result.stdout.strip().split('\n'):
|
||
if line:
|
||
build_logs.append(line)
|
||
logger.info(line)
|
||
|
||
success_message = f"Successfully built image: {image_name}"
|
||
build_logs.append(success_message)
|
||
logger.info(success_message)
|
||
|
||
return {
|
||
'image_name': image_name,
|
||
'logs': build_logs,
|
||
'success': True
|
||
}
|
||
|
||
except Exception as e:
|
||
error_message = f"Failed to build algorithm image: {str(e)}"
|
||
logger.error(error_message)
|
||
return {
|
||
'image_name': None,
|
||
'logs': [error_message],
|
||
'success': False
|
||
}
|
||
|
||
def deploy_algorithm(self, algorithm_id: str, image_name: str, port: int = None) -> Dict[str, Any]:
|
||
"""
|
||
部署算法容器
|
||
|
||
Args:
|
||
algorithm_id: 算法ID
|
||
image_name: 镜像名称
|
||
port: 端口号(如果为None则自动分配)
|
||
|
||
Returns:
|
||
部署信息
|
||
"""
|
||
try:
|
||
# 自动分配端口
|
||
if port is None:
|
||
port = self._get_available_port()
|
||
|
||
# 容器名称
|
||
container_name = f"algorithm-{algorithm_id}"
|
||
|
||
logger.info(f"Deploying container: {container_name} on port {port}")
|
||
|
||
# 停止并移除同名容器
|
||
self._stop_and_remove_container(container_name)
|
||
|
||
# 启动容器
|
||
if self.client:
|
||
container = self.client.containers.run(
|
||
image_name,
|
||
name=container_name,
|
||
ports={'8000/tcp': port},
|
||
detach=True,
|
||
restart_policy={'Name': 'unless-stopped'}
|
||
)
|
||
|
||
container_id = container.id
|
||
else:
|
||
# 备用方案:使用subprocess
|
||
result = subprocess.run(
|
||
[
|
||
'docker', 'run',
|
||
'--name', container_name,
|
||
'-p', f'{port}:8000',
|
||
'-d',
|
||
'--restart', 'unless-stopped',
|
||
image_name
|
||
],
|
||
capture_output=True,
|
||
text=True
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
logger.error(f"Failed to run container: {result.stderr}")
|
||
raise Exception(f"Failed to run container: {result.stderr}")
|
||
|
||
container_id = result.stdout.strip()
|
||
|
||
# 等待容器启动
|
||
time.sleep(2)
|
||
|
||
# 验证容器状态
|
||
container_status = self.get_container_status(container_name)
|
||
|
||
deployment_info = {
|
||
'container_name': container_name,
|
||
'container_id': container_id,
|
||
'image_name': image_name,
|
||
'port': port,
|
||
'status': container_status,
|
||
'api_url': f"http://localhost:{port}"
|
||
}
|
||
|
||
logger.info(f"Successfully deployed algorithm: {deployment_info}")
|
||
return deployment_info
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to deploy algorithm: {str(e)}")
|
||
raise
|
||
|
||
def get_container_status(self, container_name: str) -> str:
|
||
"""
|
||
获取容器状态
|
||
|
||
Args:
|
||
container_name: 容器名称
|
||
|
||
Returns:
|
||
容器状态
|
||
"""
|
||
try:
|
||
if self.client:
|
||
container = self.client.containers.get(container_name)
|
||
return container.status
|
||
else:
|
||
# 备用方案:使用subprocess
|
||
result = subprocess.run(
|
||
['docker', 'inspect', '--format', '{{.State.Status}}', container_name],
|
||
capture_output=True,
|
||
text=True
|
||
)
|
||
|
||
if result.returncode == 0:
|
||
return result.stdout.strip()
|
||
else:
|
||
return 'unknown'
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to get container status: {str(e)}")
|
||
return 'error'
|
||
|
||
def stop_container(self, container_name: str) -> bool:
|
||
"""
|
||
停止容器
|
||
|
||
Args:
|
||
container_name: 容器名称
|
||
|
||
Returns:
|
||
是否成功
|
||
"""
|
||
try:
|
||
if self.client:
|
||
container = self.client.containers.get(container_name)
|
||
container.stop()
|
||
else:
|
||
# 备用方案:使用subprocess
|
||
result = subprocess.run(
|
||
['docker', 'stop', container_name],
|
||
capture_output=True,
|
||
text=True
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
logger.error(f"Failed to stop container: {result.stderr}")
|
||
return False
|
||
|
||
logger.info(f"Successfully stopped container: {container_name}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to stop container: {str(e)}")
|
||
return False
|
||
|
||
def remove_container(self, container_name: str) -> bool:
|
||
"""
|
||
移除容器
|
||
|
||
Args:
|
||
container_name: 容器名称
|
||
|
||
Returns:
|
||
是否成功
|
||
"""
|
||
try:
|
||
if self.client:
|
||
container = self.client.containers.get(container_name)
|
||
container.remove(force=True)
|
||
else:
|
||
# 备用方案:使用subprocess
|
||
result = subprocess.run(
|
||
['docker', 'rm', '-f', container_name],
|
||
capture_output=True,
|
||
text=True
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
logger.error(f"Failed to remove container: {result.stderr}")
|
||
return False
|
||
|
||
logger.info(f"Successfully removed container: {container_name}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to remove container: {str(e)}")
|
||
return False
|
||
|
||
def list_containers(self) -> List[Dict[str, Any]]:
|
||
"""
|
||
列出所有算法容器
|
||
|
||
Returns:
|
||
容器列表
|
||
"""
|
||
containers = []
|
||
|
||
try:
|
||
if self.client:
|
||
for container in self.client.containers.list(all=True):
|
||
if container.name.startswith('algorithm-'):
|
||
containers.append({
|
||
'name': container.name,
|
||
'id': container.id,
|
||
'image': container.image.tags[0] if container.image.tags else 'unknown',
|
||
'status': container.status,
|
||
'ports': container.ports,
|
||
'created': container.attrs['Created']
|
||
})
|
||
else:
|
||
# 备用方案:使用subprocess
|
||
result = subprocess.run(
|
||
['docker', 'ps', '-a', '--format', '{{json .}}'],
|
||
capture_output=True,
|
||
text=True
|
||
)
|
||
|
||
if result.returncode == 0:
|
||
for line in result.stdout.strip().split('\n'):
|
||
if line:
|
||
container_info = json.loads(line)
|
||
if container_info['Names'].startswith('/algorithm-'):
|
||
containers.append({
|
||
'name': container_info['Names'].lstrip('/'),
|
||
'id': container_info['ID'],
|
||
'image': container_info['Image'],
|
||
'status': container_info['Status'],
|
||
'ports': container_info['Ports'],
|
||
'created': container_info['CreatedAt']
|
||
})
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to list containers: {str(e)}")
|
||
|
||
return containers
|
||
|
||
def _generate_dockerfile(self, dependencies: List[str]) -> str:
|
||
"""
|
||
生成Dockerfile
|
||
|
||
Args:
|
||
dependencies: 依赖包列表
|
||
|
||
Returns:
|
||
Dockerfile内容
|
||
"""
|
||
dockerfile = """
|
||
FROM python:3.9-slim
|
||
|
||
WORKDIR /app
|
||
|
||
# 安装系统依赖
|
||
RUN apt-get update && apt-get install -y \
|
||
gcc \
|
||
python3-dev \
|
||
&& rm -rf /var/lib/apt/lists/*
|
||
|
||
# 安装Python依赖
|
||
COPY requirements.txt .
|
||
RUN pip install --no-cache-dir -r requirements.txt
|
||
|
||
# 复制应用代码
|
||
COPY . .
|
||
|
||
# 暴露端口
|
||
EXPOSE 8000
|
||
|
||
# 启动服务
|
||
CMD ["python", "app.py"]
|
||
"""
|
||
|
||
# 生成requirements.txt内容
|
||
requirements = """
|
||
fastapi
|
||
uvicorn
|
||
pydantic
|
||
python-multipart
|
||
numpy
|
||
"""
|
||
|
||
# 为模型部署添加必要的依赖
|
||
model_deps = {'onnxruntime', 'scikit-learn'}
|
||
for dep in model_deps:
|
||
if dep not in dependencies:
|
||
requirements += f"{dep}\n"
|
||
|
||
for dep in dependencies:
|
||
if dep not in model_deps:
|
||
requirements += f"{dep}\n"
|
||
|
||
# 将requirements.txt内容添加到Dockerfile
|
||
dockerfile = dockerfile.replace('COPY requirements.txt .', f"RUN echo '{requirements}' > requirements.txt")
|
||
|
||
return dockerfile
|
||
|
||
def _generate_api_service(self) -> str:
|
||
"""
|
||
生成API服务代码
|
||
|
||
Returns:
|
||
API服务代码
|
||
"""
|
||
api_code = """
|
||
from fastapi import FastAPI, HTTPException
|
||
from pydantic import BaseModel
|
||
import uvicorn
|
||
import importlib.util
|
||
import sys
|
||
import os
|
||
|
||
# 导入算法模块
|
||
spec = importlib.util.spec_from_file_location("algorithm", "algorithm.py")
|
||
algorithm = importlib.util.module_from_spec(spec)
|
||
sys.modules["algorithm"] = algorithm
|
||
spec.loader.exec_module(algorithm)
|
||
|
||
app = FastAPI()
|
||
|
||
class AlgorithmInput(BaseModel):
|
||
input_data: dict
|
||
params: dict = {}
|
||
|
||
class AlgorithmOutput(BaseModel):
|
||
success: bool
|
||
result: dict
|
||
error: str = ""
|
||
|
||
@app.post("/execute", response_model=AlgorithmOutput)
|
||
def execute_algorithm(input_data: AlgorithmInput):
|
||
# 执行算法
|
||
try:
|
||
# 调用算法的execute函数
|
||
if hasattr(algorithm, 'execute'):
|
||
result = algorithm.execute(input_data.input_data, input_data.params)
|
||
return AlgorithmOutput(
|
||
success=True,
|
||
result=result
|
||
)
|
||
else:
|
||
raise Exception("Algorithm module does not have execute function")
|
||
|
||
except Exception as e:
|
||
return AlgorithmOutput(
|
||
success=False,
|
||
result={},
|
||
error=str(e)
|
||
)
|
||
|
||
@app.get("/health")
|
||
def health_check():
|
||
# 健康检查
|
||
return {"status": "healthy"}
|
||
|
||
@app.get("/")
|
||
def root():
|
||
# 根路径
|
||
return {"message": "Algorithm API Service"}
|
||
|
||
if __name__ == "__main__":
|
||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||
"""
|
||
|
||
return api_code
|
||
|
||
def _get_available_port(self) -> int:
|
||
"""
|
||
获取可用端口
|
||
|
||
Returns:
|
||
可用端口号
|
||
"""
|
||
import socket
|
||
|
||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||
s.bind(('', 0))
|
||
return s.getsockname()[1]
|
||
|
||
def _stop_and_remove_container(self, container_name: str):
|
||
"""
|
||
停止并移除容器
|
||
|
||
Args:
|
||
container_name: 容器名称
|
||
"""
|
||
try:
|
||
if self.client:
|
||
# 尝试获取容器
|
||
try:
|
||
container = self.client.containers.get(container_name)
|
||
# 停止容器
|
||
container.stop()
|
||
# 移除容器
|
||
container.remove()
|
||
except docker.errors.NotFound:
|
||
pass
|
||
else:
|
||
# 备用方案:使用subprocess
|
||
# 停止容器
|
||
subprocess.run(['docker', 'stop', container_name], capture_output=True)
|
||
# 移除容器
|
||
subprocess.run(['docker', 'rm', '-f', container_name], capture_output=True)
|
||
|
||
except Exception as e:
|
||
logger.error(f"Failed to stop and remove container: {str(e)}")
|
||
|
||
|
||
# 全局部署服务实例
|
||
deployment_service = DeploymentService()
|