"""部署服务模块,负责Docker容器管理、镜像构建和自动部署""" import os import sys import subprocess import json import tempfile import shutil import time from typing import Optional, List, Dict, Any import docker from sqlalchemy.orm import Session import logging from app.models.models import Algorithm, AlgorithmVersion from app.schemas.algorithm import AlgorithmCreate logger = logging.getLogger(__name__) class DeploymentService: """部署服务类,负责Docker容器管理和自动部署""" def __init__(self): try: # 连接Docker守护进程 self.client = docker.from_env() except Exception as e: logger.error(f"Failed to connect to Docker: {str(e)}") self.client = None def detect_dependencies(self, code: str) -> List[str]: """ 自动检测Python代码中的依赖 Args: code: Python代码字符串 Returns: 依赖包列表 """ dependencies = [] # 常见的导入语句检测 import_patterns = [ 'import ', 'from ', 'pip install ' ] # 常见的Python包 common_packages = { 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'torch', 'keras', 'opencv-python', 'pillow', 'matplotlib', 'seaborn', 'nltk', 'spacy', 'transformers', 'fastapi', 'flask', 'requests', 'urllib3', 'beautifulsoup4', 'sqlalchemy', 'pymongo', 'redis', 'psycopg2', 'pymysql', 'onnxruntime' } for line in code.split('\n'): line = line.strip() for pattern in import_patterns: if line.startswith(pattern): # 提取包名 if pattern == 'import ': parts = line.split()[1].split('.') if parts[0] in common_packages: dependencies.append(parts[0]) elif pattern == 'from ': parts = line.split()[1].split('.') if parts[0] in common_packages: dependencies.append(parts[0]) elif pattern == 'pip install ': package = line.split(' ', 2)[2] dependencies.append(package) # 去重 return list(set(dependencies)) def build_algorithm_image(self, algorithm_name: str, code: str, dependencies: Optional[List[str]] = None) -> Dict[str, Any]: """ 构建算法Docker镜像 Args: algorithm_name: 算法名称 code: Python代码 dependencies: 依赖包列表(如果为None则自动检测) Returns: 包含镜像名称和构建日志的字典 """ try: build_logs = [] # 自动检测依赖 build_logs.append(f"开始构建镜像: {algorithm_name}") if dependencies is None: build_logs.append("自动检测依赖...") dependencies = self.detect_dependencies(code) build_logs.append(f"检测到依赖: {', '.join(dependencies)}") else: build_logs.append(f"使用指定依赖: {', '.join(dependencies)}") # 创建临时目录 build_logs.append("创建构建环境...") with tempfile.TemporaryDirectory() as temp_dir: # 生成Dockerfile dockerfile_content = self._generate_dockerfile(dependencies) dockerfile_path = os.path.join(temp_dir, 'Dockerfile') with open(dockerfile_path, 'w') as f: f.write(dockerfile_content) build_logs.append("生成Dockerfile完成") # 生成算法代码文件 algorithm_path = os.path.join(temp_dir, 'algorithm.py') with open(algorithm_path, 'w') as f: f.write(code) build_logs.append("生成算法代码文件完成") # 生成API服务文件 api_path = os.path.join(temp_dir, 'app.py') with open(api_path, 'w') as f: f.write(self._generate_api_service()) build_logs.append("生成API服务文件完成") # 构建镜像 image_name = f"algorithm-{algorithm_name.lower().replace(' ', '-')}:latest" build_logs.append(f"开始构建镜像: {image_name}") logger.info(f"Building Docker image: {image_name}") logger.info(f"Dependencies: {dependencies}") # 使用Docker SDK构建镜像 if self.client: image, logs = self.client.images.build( path=temp_dir, tag=image_name, rm=True ) # 打印构建日志 for log in logs: if 'stream' in log: log_message = log['stream'].strip() if log_message: build_logs.append(log_message) logger.info(log_message) else: # 备用方案:使用subprocess build_logs.append("使用subprocess构建镜像...") result = subprocess.run( ['docker', 'build', '-t', image_name, temp_dir], capture_output=True, text=True ) if result.returncode != 0: error_message = f"Docker build failed: {result.stderr}" build_logs.append(error_message) logger.error(error_message) raise Exception(error_message) for line in result.stdout.strip().split('\n'): if line: build_logs.append(line) logger.info(line) success_message = f"Successfully built image: {image_name}" build_logs.append(success_message) logger.info(success_message) return { 'image_name': image_name, 'logs': build_logs, 'success': True } except Exception as e: error_message = f"Failed to build algorithm image: {str(e)}" logger.error(error_message) return { 'image_name': None, 'logs': [error_message], 'success': False } def deploy_algorithm(self, algorithm_id: str, image_name: str, port: int = None) -> Dict[str, Any]: """ 部署算法容器 Args: algorithm_id: 算法ID image_name: 镜像名称 port: 端口号(如果为None则自动分配) Returns: 部署信息 """ try: # 自动分配端口 if port is None: port = self._get_available_port() # 容器名称 container_name = f"algorithm-{algorithm_id}" logger.info(f"Deploying container: {container_name} on port {port}") # 停止并移除同名容器 self._stop_and_remove_container(container_name) # 启动容器 if self.client: container = self.client.containers.run( image_name, name=container_name, ports={'8000/tcp': port}, detach=True, restart_policy={'Name': 'unless-stopped'} ) container_id = container.id else: # 备用方案:使用subprocess result = subprocess.run( [ 'docker', 'run', '--name', container_name, '-p', f'{port}:8000', '-d', '--restart', 'unless-stopped', image_name ], capture_output=True, text=True ) if result.returncode != 0: logger.error(f"Failed to run container: {result.stderr}") raise Exception(f"Failed to run container: {result.stderr}") container_id = result.stdout.strip() # 等待容器启动 time.sleep(2) # 验证容器状态 container_status = self.get_container_status(container_name) deployment_info = { 'container_name': container_name, 'container_id': container_id, 'image_name': image_name, 'port': port, 'status': container_status, 'api_url': f"http://localhost:{port}" } logger.info(f"Successfully deployed algorithm: {deployment_info}") return deployment_info except Exception as e: logger.error(f"Failed to deploy algorithm: {str(e)}") raise def get_container_status(self, container_name: str) -> str: """ 获取容器状态 Args: container_name: 容器名称 Returns: 容器状态 """ try: if self.client: container = self.client.containers.get(container_name) return container.status else: # 备用方案:使用subprocess result = subprocess.run( ['docker', 'inspect', '--format', '{{.State.Status}}', container_name], capture_output=True, text=True ) if result.returncode == 0: return result.stdout.strip() else: return 'unknown' except Exception as e: logger.error(f"Failed to get container status: {str(e)}") return 'error' def stop_container(self, container_name: str) -> bool: """ 停止容器 Args: container_name: 容器名称 Returns: 是否成功 """ try: if self.client: container = self.client.containers.get(container_name) container.stop() else: # 备用方案:使用subprocess result = subprocess.run( ['docker', 'stop', container_name], capture_output=True, text=True ) if result.returncode != 0: logger.error(f"Failed to stop container: {result.stderr}") return False logger.info(f"Successfully stopped container: {container_name}") return True except Exception as e: logger.error(f"Failed to stop container: {str(e)}") return False def remove_container(self, container_name: str) -> bool: """ 移除容器 Args: container_name: 容器名称 Returns: 是否成功 """ try: if self.client: container = self.client.containers.get(container_name) container.remove(force=True) else: # 备用方案:使用subprocess result = subprocess.run( ['docker', 'rm', '-f', container_name], capture_output=True, text=True ) if result.returncode != 0: logger.error(f"Failed to remove container: {result.stderr}") return False logger.info(f"Successfully removed container: {container_name}") return True except Exception as e: logger.error(f"Failed to remove container: {str(e)}") return False def list_containers(self) -> List[Dict[str, Any]]: """ 列出所有算法容器 Returns: 容器列表 """ containers = [] try: if self.client: for container in self.client.containers.list(all=True): if container.name.startswith('algorithm-'): containers.append({ 'name': container.name, 'id': container.id, 'image': container.image.tags[0] if container.image.tags else 'unknown', 'status': container.status, 'ports': container.ports, 'created': container.attrs['Created'] }) else: # 备用方案:使用subprocess result = subprocess.run( ['docker', 'ps', '-a', '--format', '{{json .}}'], capture_output=True, text=True ) if result.returncode == 0: for line in result.stdout.strip().split('\n'): if line: container_info = json.loads(line) if container_info['Names'].startswith('/algorithm-'): containers.append({ 'name': container_info['Names'].lstrip('/'), 'id': container_info['ID'], 'image': container_info['Image'], 'status': container_info['Status'], 'ports': container_info['Ports'], 'created': container_info['CreatedAt'] }) except Exception as e: logger.error(f"Failed to list containers: {str(e)}") return containers def _generate_dockerfile(self, dependencies: List[str]) -> str: """ 生成Dockerfile Args: dependencies: 依赖包列表 Returns: Dockerfile内容 """ dockerfile = """ FROM python:3.9-slim WORKDIR /app # 安装系统依赖 RUN apt-get update && apt-get install -y \ gcc \ python3-dev \ && rm -rf /var/lib/apt/lists/* # 安装Python依赖 COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # 复制应用代码 COPY . . # 暴露端口 EXPOSE 8000 # 启动服务 CMD ["python", "app.py"] """ # 生成requirements.txt内容 requirements = """ fastapi uvicorn pydantic python-multipart numpy """ # 为模型部署添加必要的依赖 model_deps = {'onnxruntime', 'scikit-learn'} for dep in model_deps: if dep not in dependencies: requirements += f"{dep}\n" for dep in dependencies: if dep not in model_deps: requirements += f"{dep}\n" # 将requirements.txt内容添加到Dockerfile dockerfile = dockerfile.replace('COPY requirements.txt .', f"RUN echo '{requirements}' > requirements.txt") return dockerfile def _generate_api_service(self) -> str: """ 生成API服务代码 Returns: API服务代码 """ api_code = """ from fastapi import FastAPI, HTTPException from pydantic import BaseModel import uvicorn import importlib.util import sys import os # 导入算法模块 spec = importlib.util.spec_from_file_location("algorithm", "algorithm.py") algorithm = importlib.util.module_from_spec(spec) sys.modules["algorithm"] = algorithm spec.loader.exec_module(algorithm) app = FastAPI() class AlgorithmInput(BaseModel): input_data: dict params: dict = {} class AlgorithmOutput(BaseModel): success: bool result: dict error: str = "" @app.post("/execute", response_model=AlgorithmOutput) def execute_algorithm(input_data: AlgorithmInput): # 执行算法 try: # 调用算法的execute函数 if hasattr(algorithm, 'execute'): result = algorithm.execute(input_data.input_data, input_data.params) return AlgorithmOutput( success=True, result=result ) else: raise Exception("Algorithm module does not have execute function") except Exception as e: return AlgorithmOutput( success=False, result={}, error=str(e) ) @app.get("/health") def health_check(): # 健康检查 return {"status": "healthy"} @app.get("/") def root(): # 根路径 return {"message": "Algorithm API Service"} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000) """ return api_code def _get_available_port(self) -> int: """ 获取可用端口 Returns: 可用端口号 """ import socket with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(('', 0)) return s.getsockname()[1] def _stop_and_remove_container(self, container_name: str): """ 停止并移除容器 Args: container_name: 容器名称 """ try: if self.client: # 尝试获取容器 try: container = self.client.containers.get(container_name) # 停止容器 container.stop() # 移除容器 container.remove() except docker.errors.NotFound: pass else: # 备用方案:使用subprocess # 停止容器 subprocess.run(['docker', 'stop', container_name], capture_output=True) # 移除容器 subprocess.run(['docker', 'rm', '-f', container_name], capture_output=True) except Exception as e: logger.error(f"Failed to stop and remove container: {str(e)}") # 全局部署服务实例 deployment_service = DeploymentService()