first commit

This commit is contained in:
2026-02-08 14:42:58 +08:00
commit 20e1deae21
8197 changed files with 2264639 additions and 0 deletions

308
backend/app/utils/logger.py Normal file
View File

@@ -0,0 +1,308 @@
"""日志管理工具,提供结构化日志记录和日志查询功能"""
import logging
from logging.handlers import RotatingFileHandler
import json
from datetime import datetime, timedelta
from typing import Dict, Any, List, Optional
import os
from pathlib import Path
from app.config.settings import settings
class StructuredLogger:
"""结构化日志记录器"""
def __init__(self, name: str = "algorithm_showcase", log_dir: str = "logs"):
self.logger = logging.getLogger(name)
self.logger.setLevel(logging.INFO)
# 确保日志目录存在
log_path = Path(log_dir)
log_path.mkdir(exist_ok=True)
# 创建轮转文件处理器
log_file = log_path / f"{name}.log"
handler = RotatingFileHandler(
str(log_file),
maxBytes=10*1024*1024, # 10MB
backupCount=5
)
# 设置格式化器
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
handler.setFormatter(formatter)
# 添加处理器
if not self.logger.handlers:
self.logger.addHandler(handler)
def log_event(self, event_type: str, user_id: str = None, algorithm_id: str = None,
extra_data: Dict[str, Any] = None, level: int = logging.INFO):
"""记录事件日志"""
log_data = {
"timestamp": datetime.utcnow().isoformat(),
"event_type": event_type,
"user_id": user_id,
"algorithm_id": algorithm_id,
"extra_data": extra_data or {}
}
message = json.dumps(log_data, ensure_ascii=False, default=str)
self.logger.log(level, message)
def log_api_call(self, user_id: str, algorithm_id: str, version_id: str,
input_size: int, response_time: float, success: bool,
error_msg: str = None):
"""记录API调用日志"""
extra_data = {
"version_id": version_id,
"input_size": input_size,
"response_time": response_time,
"success": success
}
if error_msg:
extra_data["error"] = error_msg
self.log_event(
event_type="api_call",
user_id=user_id,
algorithm_id=algorithm_id,
extra_data=extra_data,
level=logging.INFO if success else logging.ERROR
)
def log_algorithm_execution(self, user_id: str, algorithm_id: str, version_id: str,
input_data: Dict[str, Any], output_data: Dict[str, Any],
execution_time: float, success: bool, error_msg: str = None):
"""记录算法执行日志"""
extra_data = {
"version_id": version_id,
"execution_time": execution_time,
"input_summary": self._summarize_data(input_data),
"output_summary": self._summarize_data(output_data),
"success": success
}
if error_msg:
extra_data["error"] = error_msg
self.log_event(
event_type="algorithm_execution",
user_id=user_id,
algorithm_id=algorithm_id,
extra_data=extra_data,
level=logging.INFO if success else logging.ERROR
)
def log_system_event(self, event_subtype: str, severity: str, message: str,
extra_data: Dict[str, Any] = None):
"""记录系统事件日志"""
extra_data = extra_data or {}
extra_data["subtype"] = event_subtype
extra_data["severity"] = severity
self.log_event(
event_type="system_event",
extra_data=extra_data,
level=self._severity_to_level(severity)
)
def _summarize_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""摘要数据以保护隐私"""
if not isinstance(data, dict):
return {"type": type(data).__name__, "size": len(str(data)) if hasattr(str(data), '__len__') else 0}
summary = {}
for key, value in list(data.items())[:5]: # 只摘要前5个项目
if isinstance(value, (dict, list)):
summary[key] = f"<{type(value).__name__}>"
else:
summary[key] = str(value)[:100] # 限制长度
return summary
def _severity_to_level(self, severity: str) -> int:
"""将严重程度转换为日志级别"""
severity_map = {
"debug": logging.DEBUG,
"info": logging.INFO,
"warning": logging.WARNING,
"error": logging.ERROR,
"critical": logging.CRITICAL
}
return severity_map.get(severity.lower(), logging.INFO)
class LogQuery:
"""日志查询工具"""
def __init__(self, log_dir: str = "logs"):
self.log_dir = Path(log_dir)
def search_logs(self,
start_date: datetime = None,
end_date: datetime = None,
event_types: List[str] = None,
user_ids: List[str] = None,
algorithm_ids: List[str] = None,
log_levels: List[str] = None,
limit: int = 100) -> List[Dict[str, Any]]:
"""搜索日志"""
results = []
# 确定搜索的日志文件
log_files = list(self.log_dir.glob("*.log"))
log_files.sort(reverse=True) # 最新的文件优先
# 转换日志级别
if log_levels:
level_map = {
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL
}
log_levels = [level_map.get(level.upper()) for level in log_levels if level.upper() in level_map]
for log_file in log_files:
try:
with open(log_file, 'r', encoding='utf-8') as f:
for line in f:
try:
# 解析日志行
parsed_line = self._parse_log_line(line.strip())
if parsed_line and self._matches_filters(
parsed_line, start_date, end_date, event_types,
user_ids, algorithm_ids, log_levels
):
results.append(parsed_line)
if len(results) >= limit:
return results
except Exception:
# 跳过无法解析的行
continue
except Exception as e:
print(f"Error reading log file {log_file}: {e}")
return results
def _parse_log_line(self, line: str) -> Optional[Dict[str, Any]]:
"""解析日志行"""
try:
# 日志行通常是 "时间 - 名称 - 级别 - JSON数据" 的格式
parts = line.split(" - ", 3)
if len(parts) >= 4:
timestamp_str = parts[0]
logger_name = parts[1]
level = parts[2]
message = parts[3]
# 尝试解析JSON消息
if message.startswith('{') and message.endswith('}'):
log_data = json.loads(message)
log_data["timestamp"] = timestamp_str
log_data["logger"] = logger_name
log_data["level"] = level
return log_data
else:
# 如果不是JSON创建基本结构
return {
"timestamp": timestamp_str,
"logger": logger_name,
"level": level,
"message": message,
"raw_line": line
}
except Exception:
pass
return None
def _matches_filters(self, log_entry: Dict[str, Any],
start_date: datetime, end_date: datetime,
event_types: List[str], user_ids: List[str],
algorithm_ids: List[str], log_levels: List[int]) -> bool:
"""检查日志条目是否匹配过滤器"""
# 时间范围检查
if start_date or end_date:
try:
entry_time = datetime.fromisoformat(log_entry.get("timestamp", "").replace("Z", "+00:00"))
if start_date and entry_time < start_date:
return False
if end_date and entry_time > end_date:
return False
except ValueError:
pass # 如果时间格式不正确,跳过时间检查
# 事件类型检查
if event_types and log_entry.get("event_type") not in event_types:
return False
# 用户ID检查
if user_ids and log_entry.get("user_id") not in user_ids:
return False
# 算法ID检查
if algorithm_ids and log_entry.get("algorithm_id") not in algorithm_ids:
return False
# 日志级别检查
if log_levels:
level_map = {
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL
}
entry_level = level_map.get(log_entry.get("level", "").upper())
if entry_level not in log_levels:
return False
return True
def get_log_stats(self, days: int = 7) -> Dict[str, Any]:
"""获取日志统计信息"""
start_date = datetime.utcnow() - timedelta(days=days)
logs = self.search_logs(start_date=start_date)
# 统计信息
stats = {
"total_logs": len(logs),
"by_event_type": {},
"by_level": {},
"by_day": {},
"error_count": 0
}
for log in logs:
# 按事件类型统计
event_type = log.get("event_type", "unknown")
stats["by_event_type"][event_type] = stats["by_event_type"].get(event_type, 0) + 1
# 按级别统计
level = log.get("level", "UNKNOWN")
stats["by_level"][level] = stats["by_level"].get(level, 0) + 1
# 按天统计
day = log.get("timestamp", "")[:10] # YYYY-MM-DD
if day:
stats["by_day"][day] = stats["by_day"].get(day, 0) + 1
# 错误计数
if log.get("level") in ["ERROR", "CRITICAL"]:
stats["error_count"] += 1
return stats
# 全局日志记录器实例
structured_logger = StructuredLogger()
log_query = LogQuery()