good version for 算法注册

2026-02-15 21:23:28 +08:00
parent 3c03777b97
commit 62ea5d36a5
115 changed files with 9566 additions and 1576 deletions
--- a/services/speech-to-text/Dockerfile
+++ b/services/speech-to-text/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.9-slim
+
+WORKDIR /app
+
+# 安装依赖
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 复制代码
+COPY . .
+
+# 暴露端口
+EXPOSE 8000
+
+# 启动服务
+CMD ["python", "main.py"]
--- a/services/speech-to-text/ai_algorithm.py
+++ b/services/speech-to-text/ai_algorithm.py
@@ -0,0 +1,89 @@
+import logging
+import base64
+from io import BytesIO
+from typing import List, Dict, Any
+
+logger = logging.getLogger(__name__)
+
+
+class SpeechToTextConverter:
+    """语音转文字转换器"""
+    
+    def __init__(self):
+        """初始化语音转文字转换器"""
+        logger.info("初始化语音转文字转换器")
+        # 这里可以加载预训练模型
+        # 示例中使用简单的规则转换
+    
+    def convert(self, audios: List[str], params: Dict[str, Any] = None) -> List[Dict[str, Any]]:
+        """转换语音为文字
+        
+        Args:
+            audios: 音频列表，每个音频为base64编码字符串
+            params: 转换参数
+            
+        Returns:
+            转换结果列表
+        """
+        if params is None:
+            params = {}
+        
+        language = params.get("language", "zh")
+        
+        results = []
+        for audio_base64 in audios:
+            # 简单的规则转换示例
+            transcription = self._simple_convert(audio_base64, language)
+            results.append({
+                "audio": audio_base64[:100] + "..." if len(audio_base64) > 100 else audio_base64,
+                "text": transcription["text"],
+                "confidence": transcription["confidence"]
+            })
+        
+        return results
+    
+    def _simple_convert(self, audio_base64: str, language: str) -> Dict[str, Any]:
+        """简单的语音转文字实现
+        
+        Args:
+            audio_base64: base64编码的音频
+            language: 语言
+            
+        Returns:
+            转换结果
+        """
+        # 简单的规则转换（基于音频大小和内容特征）
+        try:
+            # 解码base64
+            audio_data = base64.b64decode(audio_base64)
+            
+            # 计算音频大小特征
+            audio_size = len(audio_data)
+            
+            # 基于大小的简单转换
+            if audio_size < 10240:  # 小于10KB
+                text = "这是一段短音频"
+            elif audio_size < 102400:  # 小于100KB
+                text = "这是一段中等长度的音频"
+            else:  # 大于100KB
+                text = "这是一段长音频"
+            
+            # 根据语言调整文本
+            if language == "en":
+                if audio_size < 10240:
+                    text = "This is a short audio"
+                elif audio_size < 102400:
+                    text = "This is a medium length audio"
+                else:
+                    text = "This is a long audio"
+            
+            return {
+                "text": text,
+                "confidence": 0.85
+            }
+        except Exception as e:
+            logger.error(f"Speech to text conversion error: {str(e)}")
+            return {
+                "text": "",
+                "confidence": 0.0
+            }
--- a/services/speech-to-text/config.py
+++ b/services/speech-to-text/config.py
@@ -0,0 +1,27 @@
+from pydantic_settings import BaseSettings
+from typing import Optional
+
+
+class Settings(BaseSettings):
+    """服务配置"""
+    # 服务基本配置
+    HOST: str = "0.0.0.0"
+    PORT: int = 8003
+    DEBUG: bool = True
+    
+    # 服务名称
+    SERVICE_NAME: str = "speech-to-text"
+    
+    # 日志配置
+    LOG_LEVEL: str = "info"
+    
+    # 算法配置
+    DEFAULT_LANGUAGE: str = "zh"
+    
+    class Config:
+        env_file = ".env"
+        case_sensitive = True
+
+
+# 创建全局配置实例
+settings = Settings()
--- a/services/speech-to-text/main.py
+++ b/services/speech-to-text/main.py
@@ -0,0 +1,108 @@
+from fastapi import FastAPI, HTTPException, UploadFile, File
+from pydantic import BaseModel
+import uvicorn
+import json
+import logging
+import base64
+from io import BytesIO
+from .ai_algorithm import SpeechToTextConverter
+from .config import settings
+
+# 配置日志
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# 初始化FastAPI应用
+app = FastAPI(
+    title="语音转文字服务",
+    description="提供语音转文字功能的AI服务",
+    version="1.0.0"
+)
+
+# 初始化转换器
+converter = SpeechToTextConverter()
+
+# 定义请求模型
+class PredictRequest(BaseModel):
+    input_data: list
+    params: dict = {}
+
+# 定义响应模型
+class PredictResponse(BaseModel):
+    predictions: list
+    status: str
+
+@app.post("/predict", response_model=PredictResponse)
+async def predict(request: PredictRequest):
+    """算法预测接口"""
+    try:
+        logger.info(f"Received prediction request for {len(request.input_data)} audio files")
+        predictions = converter.convert(request.input_data, request.params)
+        logger.info(f"Prediction completed: {predictions}")
+        return PredictResponse(
+            predictions=predictions,
+            status="success"
+        )
+    except Exception as e:
+        logger.error(f"Prediction error: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/predict/file")
+async def predict_file(file: UploadFile = File(...)):
+    """通过文件上传进行预测"""
+    try:
+        logger.info(f"Received file upload: {file.filename}")
+        
+        # 读取文件内容
+        contents = await file.read()
+        
+        # 转换为base64
+        audio_base64 = base64.b64encode(contents).decode('utf-8')
+        
+        # 调用转换器
+        predictions = converter.convert([audio_base64])
+        
+        logger.info(f"File prediction completed: {predictions}")
+        return {
+            "predictions": predictions,
+            "status": "success",
+            "filename": file.filename
+        }
+    except Exception as e:
+        logger.error(f"File prediction error: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/health")
+async def health_check():
+    """健康检查接口"""
+    return {
+        "status": "healthy",
+        "service": "speech-to-text",
+        "version": "1.0.0"
+    }
+
+@app.get("/info")
+async def service_info():
+    """服务信息接口"""
+    return {
+        "name": "语音转文字服务",
+        "description": "提供语音转文字功能的AI服务",
+        "version": "1.0.0",
+        "endpoints": {
+            "/predict": "POST - 语音转文字预测",
+            "/predict/file": "POST - 通过文件上传进行预测",
+            "/health": "GET - 健康检查",
+            "/info": "GET - 服务信息"
+        }
+    }
+
+if __name__ == "__main__":
+    uvicorn.run(
+        "main:app",
+        host=settings.HOST,
+        port=settings.PORT,
+        reload=settings.DEBUG
+    )
--- a/services/speech-to-text/requirements.txt
+++ b/services/speech-to-text/requirements.txt
@@ -0,0 +1,5 @@
+fastapi==0.104.1
+uvicorn==0.24.0.post1
+pydantic==2.5.2
+pydantic-settings==2.1.0
+python-multipart==0.0.6
--- a/services/speech-to-text/start.sh
+++ b/services/speech-to-text/start.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# 启动语音转文字服务
+
+# 进入服务目录
+cd "$(dirname "$0")"
+
+# 检查虚拟环境是否存在
+if [ ! -d "venv" ]; then
+    echo "创建虚拟环境..."
+    python3 -m venv venv
+fi
+
+# 激活虚拟环境
+echo "激活虚拟环境..."
+source venv/bin/activate
+
+# 安装依赖
+echo "安装依赖..."
+pip install --no-cache-dir -r requirements.txt
+
+# 启动服务
+echo "启动语音转文字服务..."
+python main.py