Files
algorithm/backend/app/gitea/service.py
2026-02-08 14:42:58 +08:00

1254 lines
58 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Gitea服务处理与Gitea相关的业务逻辑"""
import os
import subprocess
import logging
import base64
import re
from typing import Optional, Dict, Any, List
import uuid
from app.gitea.client import GiteaClient
from app.config.settings import settings
from app.models.database import SessionLocal
from app.models.models import GiteaConfig
logger = logging.getLogger(__name__)
class GiteaService:
"""Gitea服务类"""
def __init__(self):
"""初始化Gitea服务"""
self.config = self._load_config()
self.client = None
if self.config:
self.client = GiteaClient(
self.config['server_url'],
self.config['access_token']
)
def _load_config(self) -> Optional[Dict[str, Any]]:
"""加载Gitea配置
Returns:
Gitea配置信息
"""
try:
db = SessionLocal()
# 从数据库中获取配置(只取第一个配置)
config = db.query(GiteaConfig).filter_by(status="active").first()
db.close()
if config:
return {
'id': config.id,
'server_url': config.server_url,
'access_token': config.access_token,
'default_owner': config.default_owner,
'repo_prefix': config.repo_prefix,
'status': config.status
}
# 配置不存在时返回默认值
return {
'server_url': getattr(settings, 'GITEA_SERVER_URL', ''),
'access_token': getattr(settings, 'GITEA_ACCESS_TOKEN', ''),
'default_owner': getattr(settings, 'GITEA_DEFAULT_OWNER', ''),
'repo_prefix': getattr(settings, 'GITEA_REPO_PREFIX', '')
}
except Exception as e:
logger.error(f"Failed to load Gitea config from database: {str(e)}")
# 出错时返回默认配置
return {
'server_url': getattr(settings, 'GITEA_SERVER_URL', ''),
'access_token': getattr(settings, 'GITEA_ACCESS_TOKEN', ''),
'default_owner': getattr(settings, 'GITEA_DEFAULT_OWNER', ''),
'repo_prefix': getattr(settings, 'GITEA_REPO_PREFIX', '')
}
def save_config(self, config: Dict[str, Any]) -> bool:
"""保存Gitea配置
Args:
config: Gitea配置信息
Returns:
是否保存成功
"""
try:
db = SessionLocal()
# 将所有现有配置设置为非活动状态
db.query(GiteaConfig).update({GiteaConfig.status: "inactive"})
# 检查是否已有配置
existing_config = db.query(GiteaConfig).first()
if existing_config:
# 更新现有配置
existing_config.server_url = config['server_url']
existing_config.access_token = config['access_token']
existing_config.default_owner = config['default_owner']
existing_config.repo_prefix = config.get('repo_prefix', '')
existing_config.status = "active"
else:
# 创建新配置
new_config = GiteaConfig(
id=f"gitea-config-{uuid.uuid4()}",
server_url=config['server_url'],
access_token=config['access_token'],
default_owner=config['default_owner'],
repo_prefix=config.get('repo_prefix', ''),
status="active"
)
db.add(new_config)
db.commit()
db.close()
# 更新内存中的配置
self.config = config
self.client = GiteaClient(
config['server_url'],
config['access_token']
)
logger.info("Gitea config saved to database successfully")
return True
except Exception as e:
logger.error(f"Failed to save Gitea config to database: {str(e)}")
return False
def get_config(self) -> Optional[Dict[str, Any]]:
"""获取Gitea配置
Returns:
Gitea配置信息
"""
return self.config
def test_connection(self) -> bool:
"""测试Gitea连接
Returns:
是否连接成功
"""
if not self.client:
return False
return self.client.check_connection()
def create_repository(self, algorithm_id: str, algorithm_name: str, description: str = "") -> Optional[Dict[str, Any]]:
"""为算法创建Gitea仓库
Args:
algorithm_id: 算法ID
algorithm_name: 算法名称
description: 仓库描述
Returns:
创建的仓库信息
"""
try:
if not self.client:
logger.error("Gitea client not initialized. Please check your Gitea configuration.")
return None
if not self.config.get('default_owner'):
logger.error("Default owner not set in Gitea configuration.")
return None
# 记录传入的algorithm_id
logger.info(f"Received algorithm_id: {algorithm_id}")
# 检查是否已经包含前缀
repo_prefix = self.config.get('repo_prefix', '')
if repo_prefix and algorithm_id.startswith(repo_prefix):
logger.info(f"Algorithm ID already contains prefix: {repo_prefix}")
repo_name = algorithm_id
else:
# 生成仓库名称,添加前缀
repo_name = f"{repo_prefix}{algorithm_id}" if repo_prefix else algorithm_id
logger.info(f"Generated repository name: {repo_name}")
logger.info(f"Creating repository: {repo_name} for owner: {self.config['default_owner']}")
# 创建仓库
repo = self.client.create_repository(
self.config['default_owner'],
repo_name,
description or f"Algorithm repository for {algorithm_name}",
False
)
if repo:
logger.info(f"Repository created successfully: {repo}")
# 验证仓库是否真的存在
verify_repo = self.client.get_repository(self.config['default_owner'], repo_name)
if not verify_repo:
logger.error(f"Repository creation verified failed: {repo_name}")
return None
else:
logger.error(f"Failed to create repository: {repo_name}")
return repo
except Exception as e:
logger.error(f"Failed to create repository: {str(e)}")
return None
def clone_repository(self, repo_url: str, algorithm_id: str, branch: str = "main") -> bool:
"""克隆Gitea仓库
Args:
repo_url: 仓库URL
algorithm_id: 算法ID
branch: 分支名称
Returns:
是否克隆成功
"""
try:
# 创建本地目录
repo_dir = f"/tmp/algorithms/{algorithm_id}"
logger.info(f"Cloning repository to: {repo_dir}")
# 导入需要的模块
import shutil
import subprocess
# 如果目录已存在,先清理它
if os.path.exists(repo_dir):
logger.info(f"Cleaning existing repository directory: {repo_dir}")
try:
shutil.rmtree(repo_dir)
logger.info(f"Successfully cleaned directory: {repo_dir}")
except Exception as e:
logger.error(f"Failed to clean directory: {str(e)}")
# 尝试使用sudo删除如果有权限
try:
subprocess.run(["sudo", "rm", "-rf", repo_dir], check=True)
logger.info(f"Successfully cleaned directory with sudo: {repo_dir}")
except Exception as e2:
logger.error(f"Failed to clean directory with sudo: {str(e2)}")
return False
# 重新创建目录
logger.info(f"Creating directory: {repo_dir}")
os.makedirs(repo_dir, exist_ok=True)
logger.info(f"Directory created successfully: {repo_dir}")
# 克隆仓库
cmd = ["git", "clone", "-b", branch, repo_url, repo_dir]
logger.info(f"Running clone command: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
logger.info(f"Repository cloned successfully: {repo_url}")
return True
else:
logger.error(f"Failed to clone repository: {result.stderr}")
# 尝试初始化仓库
logger.info(f"Trying to initialize repository in {repo_dir}")
# 初始化git仓库
init_result = subprocess.run(["git", "init"], cwd=repo_dir, capture_output=True, text=True)
if init_result.returncode != 0:
logger.error(f"Failed to initialize git repository: {init_result.stderr}")
return False
# 添加远程仓库
remote_result = subprocess.run(["git", "remote", "add", "origin", repo_url], cwd=repo_dir, capture_output=True, text=True)
if remote_result.returncode != 0:
logger.error(f"Failed to add remote repository: {remote_result.stderr}")
# 如果远程仓库已存在,尝试更新它
logger.info("Trying to update existing remote repository")
update_result = subprocess.run(["git", "remote", "set-url", "origin", repo_url], cwd=repo_dir, capture_output=True, text=True)
if update_result.returncode != 0:
logger.error(f"Failed to update remote repository: {update_result.stderr}")
return False
logger.info("Successfully updated remote repository")
# 创建初始文件
readme_path = os.path.join(repo_dir, "README.md")
with open(readme_path, "w") as f:
f.write("# Algorithm Repository\n\nThis is an algorithm repository.\n")
# 添加文件并提交
add_result = subprocess.run(["git", "add", "README.md"], cwd=repo_dir, capture_output=True, text=True)
if add_result.returncode != 0:
logger.error(f"Failed to add README.md: {add_result.stderr}")
return False
commit_result = subprocess.run(["git", "commit", "-m", "Initial commit"], cwd=repo_dir, capture_output=True, text=True)
if commit_result.returncode != 0:
logger.error(f"Failed to commit initial file: {commit_result.stderr}")
return False
# 推送代码到远程仓库
push_result = subprocess.run(["git", "push", "-u", "origin", branch], cwd=repo_dir, capture_output=True, text=True)
if push_result.returncode != 0:
logger.error(f"Failed to push initial commit: {push_result.stderr}")
# 即使推送失败,初始化仓库也算成功
logger.info(f"Repository initialized successfully, but push failed: {push_result.stderr}")
return True
logger.info(f"Repository initialized and pushed successfully: {repo_url}")
return True
except Exception as e:
logger.error(f"Failed to clone repository: {str(e)}")
return False
def push_to_repository(self, algorithm_id: str, message: str = "Update code") -> bool:
"""推送代码到Gitea仓库
Args:
algorithm_id: 算法ID
message: 提交消息
Returns:
是否推送成功
"""
try:
logger.info("=== 开始推送代码到Gitea仓库 ===")
logger.info(f"Algorithm ID: {algorithm_id}")
logger.info(f"Commit message: {message}")
repo_dir = f"/tmp/algorithms/{algorithm_id}"
logger.info(f"Repository directory: {repo_dir}")
if not os.path.exists(repo_dir):
logger.error(f"❌ Repository directory not found: {repo_dir}")
return False
# 确定仓库名称与create_repository保持一致
repo_prefix = self.config.get("repo_prefix", "")
if repo_prefix and algorithm_id.startswith(repo_prefix):
repo_name = algorithm_id
logger.info(f"Algorithm ID already contains prefix: {repo_prefix}")
else:
repo_name = f"{repo_prefix}{algorithm_id}" if repo_prefix else algorithm_id
logger.info(f"Generated repository name: {repo_name}")
import subprocess
# 检查是否是git仓库
git_dir = os.path.join(repo_dir, ".git")
if not os.path.exists(git_dir):
logger.info(f"⚠️ Git repository not initialized, initializing...")
# 初始化git仓库
logger.info(f"Executing: git init in {repo_dir}")
init_result = subprocess.run(["git", "init"], cwd=repo_dir, capture_output=True, text=True)
logger.info(f"Git init output: {init_result.stdout}")
if init_result.stderr:
logger.warning(f"Git init stderr: {init_result.stderr}")
if init_result.returncode != 0:
logger.error(f"❌ Failed to initialize git repository: {init_result.stderr}")
return False
logger.info("✅ Git repository initialized successfully")
# 添加远程仓库(从配置中获取,包含访问令牌以确保认证)
if self.config.get('default_owner'):
# 使用访问令牌构建认证URL使用正确的仓库名
auth_repo_url = f"https://{self.config['access_token']}@{self.config['server_url'].replace('https://', '').replace('http://', '')}/{self.config['default_owner']}/{repo_name}.git"
logger.info(f"Adding remote repository: {auth_repo_url}")
remote_result = subprocess.run(["git", "remote", "add", "origin", auth_repo_url], cwd=repo_dir, capture_output=True, text=True)
logger.info(f"Git remote add output: {remote_result.stdout}")
if remote_result.stderr:
logger.warning(f"Git remote add stderr: {remote_result.stderr}")
if remote_result.returncode != 0:
logger.error(f"❌ Failed to add remote repository: {remote_result.stderr}")
return False
logger.info("✅ Remote repository added successfully")
else:
logger.info("✅ Git repository already initialized")
# 检查远程URL是否正确如果不正确则更新
if self.config.get('default_owner'):
auth_repo_url = f"https://{self.config['access_token']}@{self.config['server_url'].replace('https://', '').replace('http://', '')}/{self.config['default_owner']}/{repo_name}.git"
logger.info(f"Checking remote URL...")
get_url_result = subprocess.run(["git", "remote", "get-url", "origin"], cwd=repo_dir, capture_output=True, text=True)
if get_url_result.returncode == 0:
current_url = get_url_result.stdout.strip()
expected_url = auth_repo_url
# 比较URL移除访问令牌部分
current_url_without_token = current_url.split('@')[-1] if '@' in current_url else current_url
expected_url_without_token = expected_url.split('@')[-1] if '@' in expected_url else expected_url
if current_url_without_token != expected_url_without_token:
logger.info(f"Updating remote URL from {current_url} to {expected_url}")
set_url_result = subprocess.run(["git", "remote", "set-url", "origin", auth_repo_url], cwd=repo_dir, capture_output=True, text=True)
if set_url_result.returncode != 0:
logger.warning(f"Failed to update remote URL: {set_url_result.stderr}")
else:
logger.info("✅ Remote URL updated successfully")
# 执行git命令 - 添加所有文件
logger.info("=== 执行Git操作 ===")
# 使用 git add . 添加所有文件(更可靠)
logger.info("Adding all files with git add .")
add_result = subprocess.run(["git", "add", "."], cwd=repo_dir, capture_output=True, text=True)
if add_result.returncode != 0:
logger.error(f"❌ Git add failed: {add_result.stderr}")
return False
if add_result.stderr:
logger.warning(f"Git add warning: {add_result.stderr}")
logger.info("✅ Git add completed successfully")
# 检查是否有更改需要提交
logger.info("Executing: git status --porcelain")
status_result = subprocess.run(["git", "status", "--porcelain"], cwd=repo_dir, capture_output=True, text=True)
logger.info(f"Git status output: {status_result.stdout}")
if status_result.stderr:
logger.warning(f"Git status stderr: {status_result.stderr}")
if status_result.returncode != 0:
logger.error(f"❌ Git status failed: {status_result.stderr}")
return False
# 如果有更改执行commit和push
if status_result.stdout.strip():
logger.info("✅ Changes detected, proceeding with commit and push")
# 执行git commit
logger.info(f"Executing: git commit -m '{message}'")
commit_result = subprocess.run(["git", "commit", "-m", message], cwd=repo_dir, capture_output=True, text=True)
logger.info(f"Git commit output: {commit_result.stdout}")
if commit_result.stderr:
logger.warning(f"Git commit stderr: {commit_result.stderr}")
if commit_result.returncode != 0:
logger.error(f"❌ Git commit failed: {commit_result.stderr}")
return False
logger.info("✅ Git commit completed successfully")
# 检查仓库大小
logger.info("Checking repository size before push")
total_size = 0
# 定义需要排除的文件和目录模式
exclude_patterns = [
r'node_modules[/\\]', r'dist[/\\]', r'build[/\\]', r'target[/\\]', r'out[/\\]',
r'\.next[/\\]', r'\.nuxt[/\\]', r'__pycache__[/\\]', r'\.pytest_cache[/\\]',
r'\.cache[/\\]', r'\.temp[/\\]', r'\.tmp[/\\]', r'\.idea[/\\]', r'\.vscode[/\\]',
r'\.vs[/\\]', r'\.git[/\\]', r'\.svn[/\\]', r'\.hg[/\\]',
r'\.log$', r'\.DS_Store$', r'Thumbs\.db$', r'desktop\.ini$',
]
for dirpath, dirnames, filenames in os.walk(repo_dir):
# 跳过 .git 目录和编译目录
if '.git' in dirpath:
continue
# 检查是否在排除目录中
should_skip = any(pattern in dirpath for pattern in exclude_patterns)
if should_skip:
continue
for filename in filenames:
filepath = os.path.join(dirpath, filename)
if not filepath.startswith(os.path.join(repo_dir, '.git')):
# 检查文件是否应该排除
should_exclude = any(re.search(pattern, filename) for pattern in exclude_patterns)
if not should_exclude:
file_size = os.path.getsize(filepath)
total_size += file_size
logger.info(f"Repository size (excluding build files): {total_size / (1024 * 1024):.2f} MB")
# 检查总大小限制
MAX_REPO_SIZE = 2 * 1024 * 1024 * 1024 # 2GB 仓库总大小限制
if total_size > MAX_REPO_SIZE:
error_msg = (
f"Repository size ({total_size / (1024 * 1024):.2f} MB) exceeds maximum allowed size (2 GB).\n"
f"Please remove large files (videos, models, datasets) and try again."
)
logger.error(f"{error_msg}")
return False
if total_size > 500 * 1024 * 1024: # 500MB
logger.warning(f"Repository is large: {total_size / (1024 * 1024):.2f} MB")
logger.warning("This may cause HTTP 413 errors on push")
# 设置Git推送缓冲区大小增加到1GB
logger.info("Setting Git http.postBuffer to 1GB")
buffer_result = subprocess.run(["git", "config", "http.postBuffer", "1073741824"], cwd=repo_dir, capture_output=True, text=True)
if buffer_result.returncode != 0:
logger.warning(f"Failed to set http.postBuffer: {buffer_result.stderr}")
else:
logger.info("✅ Git http.postBuffer set successfully")
# 禁用Git压缩
logger.info("Disabling Git compression")
compression_result = subprocess.run(["git", "config", "core.compression", "0"], cwd=repo_dir, capture_output=True, text=True)
if compression_result.returncode != 0:
logger.warning(f"Failed to set core.compression: {compression_result.stderr}")
else:
logger.info("✅ Git core.compression disabled successfully")
# 针对大仓库优化的推送命令
logger.info("Setting additional Git configs for large repositories...")
subprocess.run(["git", "config", "http.postBuffer", "524288000"], cwd=repo_dir) # 500MB buffer
subprocess.run(["git", "config", "pack.windowMemory", "128m"], cwd=repo_dir) # Limit memory usage
subprocess.run(["git", "config", "pack.packSizeLimit", "128m"], cwd=repo_dir) # Limit pack size
# 获取远程仓库的默认分支
logger.info("Detecting default branch...")
branch_result = subprocess.run(
["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
cwd=repo_dir,
capture_output=True,
text=True,
timeout=30
)
default_branch = "main" # 默认使用main
if branch_result.returncode == 0 and branch_result.stdout:
# 从 refs/remotes/origin/HEAD 中提取分支名
default_branch = branch_result.stdout.strip().split('/')[-1]
logger.info(f"Detected default branch: {default_branch}")
else:
logger.warning("Could not detect default branch, using 'main' as default")
# 执行git push添加更多优化参数
logger.info(f"Executing: git push with optimizations for large repositories to branch '{default_branch}'")
push_result = subprocess.run([
"git", "push",
"--verbose",
"-u", "origin", default_branch,
"--receive-pack='git receive-pack'", # Ensure proper receive pack
"--progress" # Show progress for large pushes
], cwd=repo_dir, capture_output=True, text=True, timeout=300) # 5 minute timeout
logger.info(f"Git push output: {push_result.stdout}")
if push_result.stderr:
logger.warning(f"Git push stderr: {push_result.stderr}")
if push_result.returncode != 0:
# 检查是否是常见的大文件错误
error_msg = push_result.stderr.lower()
is_large_file_error = (
"http 413" in error_msg or
"payload too large" in error_msg or
"unpack failed" in error_msg or
"remote: fatal" in error_msg or
"cannot spawn" in error_msg or
"timeout" in error_msg
)
# 如果默认分支推送失败,尝试另一个常见分支
if not is_large_file_error:
logger.warning(f"Failed to push to '{default_branch}', trying alternative branch...")
alt_branch = "master" if default_branch == "main" else "main"
push_result = subprocess.run([
"git", "push",
"--verbose",
"-u", "origin", alt_branch,
"--receive-pack='git receive-pack'",
"--progress"
], cwd=repo_dir, capture_output=True, text=True, timeout=300)
if push_result.returncode == 0:
logger.info(f"✅ Git push completed successfully to '{alt_branch}'")
return True
else:
logger.error(f"Failed to push to '{alt_branch}' as well")
if is_large_file_error:
logger.error(f"❌ Git push failed likely due to repository size: {total_size / (1024 * 1024):.2f} MB")
logger.error(f"Error details: {push_result.stderr}")
logger.error("\n📋 解决方案建议:")
logger.error("1. 检查Gitea服务器配置增加MAX_UPLOAD_SIZE限制")
logger.error("2. 尝试使用SSH协议进行推送如果服务器支持")
logger.error("3. 优化仓库大小,移除不必要的大文件")
logger.error("4. 考虑使用Git LFSLarge File Storage管理大文件")
# 尝试使用SSH协议进行推送如果URL是HTTPS格式
logger.info("\n🔄 尝试使用SSH协议进行推送...")
try:
# 获取当前远程URL
remote_result = subprocess.run(["git", "remote", "get-url", "origin"], cwd=repo_dir, capture_output=True, text=True, timeout=30)
if remote_result.returncode == 0:
https_url = remote_result.stdout.strip()
# 将HTTPS URL转换为SSH URL
if https_url.startswith("https://"):
ssh_url = https_url.replace("https://", "git@").replace(":", "/")
logger.info(f"Converting HTTPS URL to SSH URL: {ssh_url}")
# 更新远程URL
set_url_result = subprocess.run(["git", "remote", "set-url", "origin", ssh_url], cwd=repo_dir, capture_output=True, text=True, timeout=30)
if set_url_result.returncode == 0:
logger.info("✅ Remote URL updated to SSH format")
# 再次尝试推送,使用更保守的参数
logger.info("Executing: git push with SSH and conservative parameters")
ssh_push_result = subprocess.run([
"git", "push",
"--verbose",
"-u", "origin", "main"
], cwd=repo_dir, capture_output=True, text=True, timeout=600) # 10 minute timeout for SSH
if ssh_push_result.returncode == 0:
logger.info("✅ Git push completed successfully with SSH")
# 改回HTTPS URL
reset_url_result = subprocess.run(["git", "remote", "set-url", "origin", https_url], cwd=repo_dir, capture_output=True, text=True, timeout=30)
if reset_url_result.returncode != 0:
logger.warning(f"Failed to reset remote URL to HTTPS: {reset_url_result.stderr}")
return True
else:
logger.warning(f"SSH push failed: {ssh_push_result.stderr}")
# 改回HTTPS URL
reset_url_result = subprocess.run(["git", "remote", "set-url", "origin", https_url], cwd=repo_dir, capture_output=True, text=True, timeout=30)
if reset_url_result.returncode != 0:
logger.warning(f"Failed to reset remote URL to HTTPS: {reset_url_result.stderr}")
# 如果SSH也失败尝试分阶段推送
logger.info("\n🔄 尝试分阶段推送...")
return self.push_repository_staged(repo_dir, https_url)
else:
logger.warning(f"Could not get remote URL: {remote_result.stderr}")
except subprocess.TimeoutExpired:
logger.warning("Remote URL command timed out")
except Exception as e:
logger.warning(f"Failed to try SSH push: {str(e)}")
else:
logger.error(f"❌ Git push failed: {push_result.stderr}")
return False
logger.info("✅ Git push completed successfully")
else:
logger.info(" No changes to commit")
logger.info(f"✅ Code pushed successfully for algorithm: {algorithm_id}")
return True
except Exception as e:
logger.error(f"=== 推送代码失败 ===")
logger.error(f"Error: {str(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return False
def pull_from_repository(self, algorithm_id: str) -> bool:
"""从Gitea仓库拉取代码
Args:
algorithm_id: 算法ID
Returns:
是否拉取成功
"""
try:
repo_dir = f"/tmp/algorithms/{algorithm_id}"
if not os.path.exists(repo_dir):
logger.error(f"Repository directory not found: {repo_dir}")
return False
# 执行git pull命令
result = subprocess.run(
["git", "pull"],
cwd=repo_dir,
capture_output=True,
text=True
)
if result.returncode == 0:
logger.info(f"Code pulled successfully for algorithm: {algorithm_id}")
return True
else:
logger.error(f"Failed to pull code: {result.stderr}")
return False
except Exception as e:
logger.error(f"Failed to pull code: {str(e)}")
return False
def push_repository_staged(self, repo_dir: str, origin_url: str) -> bool:
"""
分阶段推送仓库,用于处理超大仓库
"""
try:
import subprocess
import os
logger.info("=== 开始分阶段推送仓库 ===")
logger.info(f"Repository directory: {repo_dir}")
# 获取所有文件并按类型分组
all_files = []
for root, dirs, files in os.walk(repo_dir):
# 跳过 .git 目录
if '.git' in root:
continue
for file in files:
file_path = os.path.relpath(os.path.join(root, file), repo_dir)
if file_path.startswith('.git'):
continue
all_files.append(file_path)
logger.info(f"Total files to stage: {len(all_files)}")
# 按扩展名分类文件,优先推送小文件
def get_file_size(file_path):
try:
return os.path.getsize(os.path.join(repo_dir, file_path))
except:
return 0
# 按文件大小排序(从小到大)
sorted_files = sorted(all_files, key=get_file_size)
# 分批处理每批最多50个文件或不超过50MB
batch_size_limit = 50
batch_size_bytes = 50 * 1024 * 1024 # 50MB
current_batch = []
current_batch_size = 0
batch_number = 1
for file_path in sorted_files:
file_full_path = os.path.join(repo_dir, file_path)
file_size = get_file_size(file_path)
# 如果单个文件太大,单独处理
if file_size > batch_size_bytes:
logger.info(f"Handling large file separately: {file_path} ({file_size / (1024*1024):.2f}MB)")
# 单独添加和推送这个大文件
add_result = subprocess.run(["git", "add", file_path], cwd=repo_dir, capture_output=True, text=True)
if add_result.returncode != 0:
logger.error(f"Failed to add large file {file_path}: {add_result.stderr}")
continue
# 检查是否有暂存的更改
status_result = subprocess.run(["git", "status", "--porcelain"], cwd=repo_dir, capture_output=True, text=True)
if status_result.stdout.strip():
# 创建专门的提交
commit_msg = f"Add large file: {file_path}"
commit_result = subprocess.run(["git", "commit", "-m", commit_msg], cwd=repo_dir, capture_output=True, text=True)
if commit_result.returncode == 0:
logger.info(f"Committed large file: {file_path}")
# 推送这个提交
push_result = subprocess.run([
"git", "push", "--verbose", "origin", "main"
], cwd=repo_dir, capture_output=True, text=True, timeout=300)
if push_result.returncode != 0:
logger.warning(f"Push failed for large file {file_path}: {push_result.stderr}")
# 如果推送失败,尝试重置这个文件的暂存状态
subprocess.run(["git", "reset", "HEAD", file_path], cwd=repo_dir, capture_output=True, text=True)
else:
logger.info(f"Successfully pushed large file: {file_path}")
else:
logger.error(f"Failed to commit large file {file_path}: {commit_result.stderr}")
else:
# 尝试添加到当前批次
if (len(current_batch) >= batch_size_limit or
current_batch_size + file_size > batch_size_bytes):
# 推送当前批次
if current_batch:
logger.info(f"Pushing batch {batch_number} with {len(current_batch)} files...")
success = self.push_batch(repo_dir, current_batch, batch_number, origin_url)
if not success:
logger.error(f"Failed to push batch {batch_number}")
return False
batch_number += 1
current_batch = []
current_batch_size = 0
current_batch.append(file_path)
current_batch_size += file_size
# 推送最后一批
if current_batch:
logger.info(f"Pushing final batch {batch_number} with {len(current_batch)} files...")
success = self.push_batch(repo_dir, current_batch, batch_number, origin_url)
if not success:
logger.error(f"Failed to push final batch {batch_number}")
return False
logger.info("✅ 分阶段推送完成")
return True
except Exception as e:
logger.error(f"❌ 分阶段推送失败: {str(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return False
def push_batch(self, repo_dir: str, file_batch: list, batch_num: int, origin_url: str) -> bool:
"""
推送文件批次
"""
try:
import subprocess
logger.info(f"Processing batch {batch_num}: {len(file_batch)} files")
# 添加批次中的文件
for file_path in file_batch:
add_result = subprocess.run(["git", "add", file_path], cwd=repo_dir, capture_output=True, text=True)
if add_result.returncode != 0:
logger.error(f"Failed to add file {file_path}: {add_result.stderr}")
return False
# 检查是否有更改需要提交
status_result = subprocess.run(["git", "status", "--porcelain"], cwd=repo_dir, capture_output=True, text=True)
if not status_result.stdout.strip():
logger.info(f"No changes in batch {batch_num}")
return True
# 提交批次
commit_result = subprocess.run([
"git", "commit", "-m", f"Batch {batch_num}: Add {len(file_batch)} files"
], cwd=repo_dir, capture_output=True, text=True)
if commit_result.returncode != 0:
logger.warning(f"Commit failed or no changes for batch {batch_num}: {commit_result.stderr}")
# 即使没有更改,也可能正常(比如文件没变)
# 推送批次
push_result = subprocess.run([
"git", "push", "--verbose", "origin", "main"
], cwd=repo_dir, capture_output=True, text=True, timeout=300)
if push_result.returncode == 0:
logger.info(f"✅ Batch {batch_num} pushed successfully")
return True
else:
logger.error(f"❌ Batch {batch_num} push failed: {push_result.stderr}")
return False
except subprocess.TimeoutExpired:
logger.error(f"❌ Batch {batch_num} push timed out")
return False
except Exception as e:
logger.error(f"❌ Batch {batch_num} push failed with error: {str(e)}")
return False
def get_repository_info(self, repo_owner: str, repo_name: str) -> Optional[Dict[str, Any]]:
"""获取仓库信息
Args:
repo_owner: 仓库所有者
repo_name: 仓库名称
Returns:
仓库信息
"""
if not self.client:
return None
return self.client.get_repository(repo_owner, repo_name)
def list_repositories(self, owner: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
"""列出仓库
Args:
owner: 所有者(用户或组织)
Returns:
仓库列表
"""
if not self.client:
return None
target_owner = owner or self.config.get('default_owner')
if not target_owner:
return None
return self.client.list_repositories(target_owner)
def register_algorithm_from_repo(self, repo_owner: str, repo_name: str, algorithm_id: str) -> bool:
"""从仓库注册算法服务
Args:
repo_owner: 仓库所有者
repo_name: 仓库名称
algorithm_id: 算法ID
Returns:
是否注册成功
"""
try:
# 这里应该实现从仓库注册算法服务的逻辑
# 1. 克隆仓库
# 2. 扫描仓库中的算法代码
# 3. 注册算法服务
logger.info(f"Algorithm registered from repo: {repo_owner}/{repo_name}")
return True
except Exception as e:
logger.error(f"Failed to register algorithm from repo: {str(e)}")
return False
# 递归遍历目录中的所有文件
for root, dirs, files in os.walk(repo_dir):
# 跳过 .git 目录
if '.git' in root:
continue
for file in files:
file_path = os.path.relpath(os.path.join(root, file), repo_dir)
if file_path.startswith('.git'):
continue
full_file_path = os.path.join(root, file)
# 读取文件内容并进行base64编码
try:
with open(full_file_path, 'rb') as f:
file_content = f.read()
encoded_content = base64.b64encode(file_content).decode('utf-8')
# 使用Gitea API创建或更新文件
if self.client:
# 移除开头的./,如果有的话
clean_path = file_path.lstrip('./\\')
result = self.client.create_file(
self.config["default_owner"],
algorithm_id,
clean_path,
encoded_content,
f"{message} - Upload {clean_path}"
)
if result:
logger.info(f"✅ File uploaded via API: {clean_path}")
else:
logger.error(f"❌ Failed to upload file via API: {clean_path}")
return False
else:
logger.error("❌ Gitea client not initialized")
return False
except Exception as e:
logger.error(f"❌ Error processing file {file_path}: {str(e)}")
return False
logger.info(f"✅ All files uploaded successfully via API for algorithm: {algorithm_id}")
return True
except Exception as e:
logger.error(f"❌ Failed to upload files via API: {str(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return False
def upload_files_via_api(self, algorithm_id: str, message: str = "Update code") -> bool:
"""使用Git命令行上传文件到Gitea仓库通过克隆-修改-推送)
Args:
algorithm_id: 算法ID可能包含前缀
message: 提交消息
Returns:
是否上传成功
"""
try:
import os
import subprocess
# 确定仓库名称与create_repository保持一致
repo_prefix = self.config.get("repo_prefix", "")
if repo_prefix and algorithm_id.startswith(repo_prefix):
# 如果algorithm_id已经包含前缀直接使用与create_repository逻辑一致
repo_name = algorithm_id
logger.info(f"Algorithm ID already contains prefix: {repo_prefix}")
else:
# 否则添加前缀
repo_name = f"{repo_prefix}{algorithm_id}" if repo_prefix else algorithm_id
logger.info(f"Generated repository name: {repo_name}")
temp_repo_dir = f"/tmp/algorithms/{algorithm_id}_temp"
logger.info(f"Uploading files via Git clone/push for algorithm: {algorithm_id}")
logger.info(f"Repository name: {repo_name}")
logger.info(f"Temp repository directory: {temp_repo_dir}")
# 清理临时目录(如果存在)
if os.path.exists(temp_repo_dir):
import shutil
shutil.rmtree(temp_repo_dir)
# 使用认证URL克隆仓库
server_url = self.config["server_url"].replace("https://", "").replace("http://", "")
auth_repo_url = f"https://{self.config['access_token']}@{server_url}/{self.config['default_owner']}/{repo_name}.git"
logger.info(f"Cloning repository: {auth_repo_url}")
clone_result = subprocess.run(["git", "clone", auth_repo_url, temp_repo_dir], capture_output=True, text=True)
if clone_result.returncode != 0:
logger.error(f"❌ Failed to clone repository: {clone_result.stderr}")
return False
logger.info("✅ Repository cloned successfully")
# 获取本地文件目录
local_repo_dir = f"/tmp/algorithms/{algorithm_id}"
if not os.path.exists(local_repo_dir):
logger.error(f"❌ Local repository directory not found: {local_repo_dir}")
return False
# 将本地文件复制到克隆的仓库
import shutil
copied_files = []
for root, dirs, files in os.walk(local_repo_dir):
# 跳过 .git 目录
if ".git" in root:
continue
for file in files:
src_path = os.path.join(root, file)
rel_path = os.path.relpath(src_path, local_repo_dir)
dest_path = os.path.join(temp_repo_dir, rel_path)
# 确保目标目录存在
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
# 复制文件
shutil.copy2(src_path, dest_path)
copied_files.append(rel_path)
logger.info(f"Copied {len(copied_files)} files")
# 添加所有更改到Git
add_result = subprocess.run(["git", "add", "."], cwd=temp_repo_dir, capture_output=True, text=True)
if add_result.returncode != 0:
logger.error(f"❌ Failed to add files: {add_result.stderr}")
return False
# 配置Git用户信息
subprocess.run(["git", "config", "user.email", "system@algorithm-showcase.com"], cwd=temp_repo_dir)
subprocess.run(["git", "config", "user.name", "Algorithm Showcase System"], cwd=temp_repo_dir)
# 提交更改
commit_result = subprocess.run(["git", "commit", "-m", message], cwd=temp_repo_dir, capture_output=True, text=True)
if commit_result.returncode != 0 and "nothing to commit" not in commit_result.stdout:
logger.error(f"❌ Failed to commit changes: {commit_result.stderr}")
return False
elif "nothing to commit" in commit_result.stdout:
logger.info(" No changes to commit")
return True
# 获取远程仓库的默认分支
logger.info("Detecting default branch...")
branch_result = subprocess.run(
["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
cwd=temp_repo_dir,
capture_output=True,
text=True
)
default_branch = "main" # 默认使用main
if branch_result.returncode == 0 and branch_result.stdout:
# 从 refs/remotes/origin/HEAD 中提取分支名
default_branch = branch_result.stdout.strip().split('/')[-1]
logger.info(f"Detected default branch: {default_branch}")
else:
logger.warning("Could not detect default branch, trying 'main' and 'master'")
# 尝试推送到默认分支
push_result = subprocess.run(
["git", "push", "-u", "origin", default_branch],
cwd=temp_repo_dir,
capture_output=True,
text=True
)
if push_result.returncode != 0:
logger.warning(f"Failed to push to '{default_branch}': {push_result.stderr}")
# 如果推送到main失败尝试master
if default_branch == "main":
logger.info("Trying 'master' branch instead...")
push_result = subprocess.run(
["git", "push", "-u", "origin", "master"],
cwd=temp_repo_dir,
capture_output=True,
text=True
)
if push_result.returncode != 0:
logger.error(f"❌ Failed to push to master branch: {push_result.stderr}")
return False
else:
logger.error(f"❌ Failed to push to '{default_branch}': {push_result.stderr}")
return False
logger.info(f"✅ All files uploaded successfully via Git for algorithm: {algorithm_id}")
# 清理临时目录
if os.path.exists(temp_repo_dir):
shutil.rmtree(temp_repo_dir)
return True
except Exception as e:
logger.error(f"❌ Failed to upload files via Git: {str(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return False
def verify_push(self, algorithm_id: str) -> bool:
"""验证代码是否成功推送到Gitea仓库
Args:
algorithm_id: 算法ID
Returns:
是否验证成功
"""
try:
# 检查client是否初始化
if not self.client:
logger.warning("Gitea client not initialized, skipping verification")
return True
# 检查配置是否完整
if not self.config.get('default_owner'):
logger.warning("Default owner not set, skipping verification")
return True
# 确定仓库名称与create_repository保持一致
repo_prefix = self.config.get("repo_prefix", "")
if repo_prefix and algorithm_id.startswith(repo_prefix):
# 如果algorithm_id已经包含前缀直接使用与create_repository逻辑一致
repo_name = algorithm_id
else:
# 否则添加前缀
repo_name = f"{repo_prefix}{algorithm_id}" if repo_prefix else algorithm_id
logger.info(f"Verifying push for algorithm: {algorithm_id} (repo: {repo_name})")
# 获取仓库信息
repo_info = self.client.get_repository(
self.config['default_owner'],
repo_name
)
if not repo_info:
logger.error(f"Repository not found: {repo_name}")
return False
# 检查仓库是否有文件
files = self.client.get_repository_files(
self.config['default_owner'],
repo_name
)
if not files or len(files) == 0:
logger.warning(f"Repository exists but has no files")
return False
logger.info(f"✅ Push verified successfully - Repository has {len(files)} files")
return True
except Exception as e:
logger.error(f"Failed to verify push: {str(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return True # 验证失败不应该影响推送结果
def update_repository_info(self, algorithm_id: str, name: Optional[str] = None, description: Optional[str] = None, private: Optional[bool] = None) -> Optional[Dict[str, Any]]:
"""更新仓库信息
Args:
algorithm_id: 算法ID
name: 新的仓库名称(可选)
description: 新的仓库描述(可选)
private: 是否私有(可选)
Returns:
更新后的仓库信息
"""
try:
if not self.client:
logger.error("Gitea client not initialized. Please check your Gitea configuration.")
return None
if not self.config.get('default_owner'):
logger.error("Default owner not set in Gitea configuration.")
return None
# 确定仓库名称与create_repository保持一致
repo_prefix = self.config.get("repo_prefix", "")
if repo_prefix and algorithm_id.startswith(repo_prefix):
# 如果algorithm_id已经包含前缀直接使用与create_repository逻辑一致
repo_name = algorithm_id
else:
# 否则添加前缀
repo_name = f"{repo_prefix}{algorithm_id}" if repo_prefix else algorithm_id
logger.info(f"Updating repository info for: {repo_name}")
logger.info(f"New name: {name}, new description: {description}, private: {private}")
# 调用API更新仓库信息
updated_repo = self.client.update_repository(
self.config['default_owner'],
repo_name,
name=name,
description=description,
private=private
)
if updated_repo:
logger.info(f"✅ Repository info updated successfully: {repo_name}")
return updated_repo
else:
logger.error(f"❌ Failed to update repository info: {repo_name}")
return None
except Exception as e:
logger.error(f"❌ Failed to update repository info: {str(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return None
def delete_repository(self, algorithm_id: str) -> bool:
"""删除Gitea仓库
Args:
algorithm_id: 算法ID
Returns:
是否删除成功
"""
try:
if not self.client:
logger.error("Gitea client not initialized. Please check your Gitea configuration.")
return False
if not self.config.get('default_owner'):
logger.error("Default owner not set in Gitea configuration.")
return False
# 确定仓库名称与create_repository保持一致
repo_prefix = self.config.get("repo_prefix", "")
if repo_prefix and algorithm_id.startswith(repo_prefix):
# 如果algorithm_id已经包含前缀直接使用与create_repository逻辑一致
repo_name = algorithm_id
else:
# 否则添加前缀
repo_name = f"{repo_prefix}{algorithm_id}" if repo_prefix else algorithm_id
logger.info(f"Deleting repository: {repo_name}")
# 调用API删除仓库
success = self.client.delete_repository(
self.config['default_owner'],
repo_name
)
if success:
logger.info(f"✅ Repository deleted successfully: {repo_name}")
return True
else:
logger.error(f"❌ Failed to delete repository: {repo_name}")
return False
except Exception as e:
logger.error(f"❌ Failed to delete repository: {str(e)}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return False
# 全局Gitea服务实例
gitea_service = GiteaService()