308 lines
11 KiB
Python
308 lines
11 KiB
Python
"""日志管理工具,提供结构化日志记录和日志查询功能"""
|
||
|
||
import logging
|
||
from logging.handlers import RotatingFileHandler
|
||
import json
|
||
from datetime import datetime, timedelta
|
||
from typing import Dict, Any, List, Optional
|
||
import os
|
||
from pathlib import Path
|
||
|
||
from app.config.settings import settings
|
||
|
||
|
||
class StructuredLogger:
|
||
"""结构化日志记录器"""
|
||
|
||
def __init__(self, name: str = "algorithm_showcase", log_dir: str = "logs"):
|
||
self.logger = logging.getLogger(name)
|
||
self.logger.setLevel(logging.INFO)
|
||
|
||
# 确保日志目录存在
|
||
log_path = Path(log_dir)
|
||
log_path.mkdir(exist_ok=True)
|
||
|
||
# 创建轮转文件处理器
|
||
log_file = log_path / f"{name}.log"
|
||
handler = RotatingFileHandler(
|
||
str(log_file),
|
||
maxBytes=10*1024*1024, # 10MB
|
||
backupCount=5
|
||
)
|
||
|
||
# 设置格式化器
|
||
formatter = logging.Formatter(
|
||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||
)
|
||
handler.setFormatter(formatter)
|
||
|
||
# 添加处理器
|
||
if not self.logger.handlers:
|
||
self.logger.addHandler(handler)
|
||
|
||
def log_event(self, event_type: str, user_id: str = None, algorithm_id: str = None,
|
||
extra_data: Dict[str, Any] = None, level: int = logging.INFO):
|
||
"""记录事件日志"""
|
||
log_data = {
|
||
"timestamp": datetime.utcnow().isoformat(),
|
||
"event_type": event_type,
|
||
"user_id": user_id,
|
||
"algorithm_id": algorithm_id,
|
||
"extra_data": extra_data or {}
|
||
}
|
||
|
||
message = json.dumps(log_data, ensure_ascii=False, default=str)
|
||
self.logger.log(level, message)
|
||
|
||
def log_api_call(self, user_id: str, algorithm_id: str, version_id: str,
|
||
input_size: int, response_time: float, success: bool,
|
||
error_msg: str = None):
|
||
"""记录API调用日志"""
|
||
extra_data = {
|
||
"version_id": version_id,
|
||
"input_size": input_size,
|
||
"response_time": response_time,
|
||
"success": success
|
||
}
|
||
|
||
if error_msg:
|
||
extra_data["error"] = error_msg
|
||
|
||
self.log_event(
|
||
event_type="api_call",
|
||
user_id=user_id,
|
||
algorithm_id=algorithm_id,
|
||
extra_data=extra_data,
|
||
level=logging.INFO if success else logging.ERROR
|
||
)
|
||
|
||
def log_algorithm_execution(self, user_id: str, algorithm_id: str, version_id: str,
|
||
input_data: Dict[str, Any], output_data: Dict[str, Any],
|
||
execution_time: float, success: bool, error_msg: str = None):
|
||
"""记录算法执行日志"""
|
||
extra_data = {
|
||
"version_id": version_id,
|
||
"execution_time": execution_time,
|
||
"input_summary": self._summarize_data(input_data),
|
||
"output_summary": self._summarize_data(output_data),
|
||
"success": success
|
||
}
|
||
|
||
if error_msg:
|
||
extra_data["error"] = error_msg
|
||
|
||
self.log_event(
|
||
event_type="algorithm_execution",
|
||
user_id=user_id,
|
||
algorithm_id=algorithm_id,
|
||
extra_data=extra_data,
|
||
level=logging.INFO if success else logging.ERROR
|
||
)
|
||
|
||
def log_system_event(self, event_subtype: str, severity: str, message: str,
|
||
extra_data: Dict[str, Any] = None):
|
||
"""记录系统事件日志"""
|
||
extra_data = extra_data or {}
|
||
extra_data["subtype"] = event_subtype
|
||
extra_data["severity"] = severity
|
||
|
||
self.log_event(
|
||
event_type="system_event",
|
||
extra_data=extra_data,
|
||
level=self._severity_to_level(severity)
|
||
)
|
||
|
||
def _summarize_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""摘要数据以保护隐私"""
|
||
if not isinstance(data, dict):
|
||
return {"type": type(data).__name__, "size": len(str(data)) if hasattr(str(data), '__len__') else 0}
|
||
|
||
summary = {}
|
||
for key, value in list(data.items())[:5]: # 只摘要前5个项目
|
||
if isinstance(value, (dict, list)):
|
||
summary[key] = f"<{type(value).__name__}>"
|
||
else:
|
||
summary[key] = str(value)[:100] # 限制长度
|
||
return summary
|
||
|
||
def _severity_to_level(self, severity: str) -> int:
|
||
"""将严重程度转换为日志级别"""
|
||
severity_map = {
|
||
"debug": logging.DEBUG,
|
||
"info": logging.INFO,
|
||
"warning": logging.WARNING,
|
||
"error": logging.ERROR,
|
||
"critical": logging.CRITICAL
|
||
}
|
||
return severity_map.get(severity.lower(), logging.INFO)
|
||
|
||
|
||
class LogQuery:
|
||
"""日志查询工具"""
|
||
|
||
def __init__(self, log_dir: str = "logs"):
|
||
self.log_dir = Path(log_dir)
|
||
|
||
def search_logs(self,
|
||
start_date: datetime = None,
|
||
end_date: datetime = None,
|
||
event_types: List[str] = None,
|
||
user_ids: List[str] = None,
|
||
algorithm_ids: List[str] = None,
|
||
log_levels: List[str] = None,
|
||
limit: int = 100) -> List[Dict[str, Any]]:
|
||
"""搜索日志"""
|
||
results = []
|
||
|
||
# 确定搜索的日志文件
|
||
log_files = list(self.log_dir.glob("*.log"))
|
||
log_files.sort(reverse=True) # 最新的文件优先
|
||
|
||
# 转换日志级别
|
||
if log_levels:
|
||
level_map = {
|
||
"DEBUG": logging.DEBUG,
|
||
"INFO": logging.INFO,
|
||
"WARNING": logging.WARNING,
|
||
"ERROR": logging.ERROR,
|
||
"CRITICAL": logging.CRITICAL
|
||
}
|
||
log_levels = [level_map.get(level.upper()) for level in log_levels if level.upper() in level_map]
|
||
|
||
for log_file in log_files:
|
||
try:
|
||
with open(log_file, 'r', encoding='utf-8') as f:
|
||
for line in f:
|
||
try:
|
||
# 解析日志行
|
||
parsed_line = self._parse_log_line(line.strip())
|
||
if parsed_line and self._matches_filters(
|
||
parsed_line, start_date, end_date, event_types,
|
||
user_ids, algorithm_ids, log_levels
|
||
):
|
||
results.append(parsed_line)
|
||
|
||
if len(results) >= limit:
|
||
return results
|
||
except Exception:
|
||
# 跳过无法解析的行
|
||
continue
|
||
|
||
except Exception as e:
|
||
print(f"Error reading log file {log_file}: {e}")
|
||
|
||
return results
|
||
|
||
def _parse_log_line(self, line: str) -> Optional[Dict[str, Any]]:
|
||
"""解析日志行"""
|
||
try:
|
||
# 日志行通常是 "时间 - 名称 - 级别 - JSON数据" 的格式
|
||
parts = line.split(" - ", 3)
|
||
if len(parts) >= 4:
|
||
timestamp_str = parts[0]
|
||
logger_name = parts[1]
|
||
level = parts[2]
|
||
message = parts[3]
|
||
|
||
# 尝试解析JSON消息
|
||
if message.startswith('{') and message.endswith('}'):
|
||
log_data = json.loads(message)
|
||
log_data["timestamp"] = timestamp_str
|
||
log_data["logger"] = logger_name
|
||
log_data["level"] = level
|
||
return log_data
|
||
else:
|
||
# 如果不是JSON,创建基本结构
|
||
return {
|
||
"timestamp": timestamp_str,
|
||
"logger": logger_name,
|
||
"level": level,
|
||
"message": message,
|
||
"raw_line": line
|
||
}
|
||
except Exception:
|
||
pass
|
||
return None
|
||
|
||
def _matches_filters(self, log_entry: Dict[str, Any],
|
||
start_date: datetime, end_date: datetime,
|
||
event_types: List[str], user_ids: List[str],
|
||
algorithm_ids: List[str], log_levels: List[int]) -> bool:
|
||
"""检查日志条目是否匹配过滤器"""
|
||
# 时间范围检查
|
||
if start_date or end_date:
|
||
try:
|
||
entry_time = datetime.fromisoformat(log_entry.get("timestamp", "").replace("Z", "+00:00"))
|
||
if start_date and entry_time < start_date:
|
||
return False
|
||
if end_date and entry_time > end_date:
|
||
return False
|
||
except ValueError:
|
||
pass # 如果时间格式不正确,跳过时间检查
|
||
|
||
# 事件类型检查
|
||
if event_types and log_entry.get("event_type") not in event_types:
|
||
return False
|
||
|
||
# 用户ID检查
|
||
if user_ids and log_entry.get("user_id") not in user_ids:
|
||
return False
|
||
|
||
# 算法ID检查
|
||
if algorithm_ids and log_entry.get("algorithm_id") not in algorithm_ids:
|
||
return False
|
||
|
||
# 日志级别检查
|
||
if log_levels:
|
||
level_map = {
|
||
"DEBUG": logging.DEBUG,
|
||
"INFO": logging.INFO,
|
||
"WARNING": logging.WARNING,
|
||
"ERROR": logging.ERROR,
|
||
"CRITICAL": logging.CRITICAL
|
||
}
|
||
entry_level = level_map.get(log_entry.get("level", "").upper())
|
||
if entry_level not in log_levels:
|
||
return False
|
||
|
||
return True
|
||
|
||
def get_log_stats(self, days: int = 7) -> Dict[str, Any]:
|
||
"""获取日志统计信息"""
|
||
start_date = datetime.utcnow() - timedelta(days=days)
|
||
|
||
logs = self.search_logs(start_date=start_date)
|
||
|
||
# 统计信息
|
||
stats = {
|
||
"total_logs": len(logs),
|
||
"by_event_type": {},
|
||
"by_level": {},
|
||
"by_day": {},
|
||
"error_count": 0
|
||
}
|
||
|
||
for log in logs:
|
||
# 按事件类型统计
|
||
event_type = log.get("event_type", "unknown")
|
||
stats["by_event_type"][event_type] = stats["by_event_type"].get(event_type, 0) + 1
|
||
|
||
# 按级别统计
|
||
level = log.get("level", "UNKNOWN")
|
||
stats["by_level"][level] = stats["by_level"].get(level, 0) + 1
|
||
|
||
# 按天统计
|
||
day = log.get("timestamp", "")[:10] # YYYY-MM-DD
|
||
if day:
|
||
stats["by_day"][day] = stats["by_day"].get(day, 0) + 1
|
||
|
||
# 错误计数
|
||
if log.get("level") in ["ERROR", "CRITICAL"]:
|
||
stats["error_count"] += 1
|
||
|
||
return stats
|
||
|
||
|
||
# 全局日志记录器实例
|
||
structured_logger = StructuredLogger()
|
||
log_query = LogQuery() |