good version for 算法注册

This commit is contained in:
2026-02-15 21:23:28 +08:00
parent 3c03777b97
commit 62ea5d36a5
115 changed files with 9566 additions and 1576 deletions

View File

@@ -0,0 +1,16 @@
FROM python:3.9-slim
WORKDIR /app
# 安装依赖
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# 复制代码
COPY . .
# 暴露端口
EXPOSE 8000
# 启动服务
CMD ["python", "main.py"]

View File

@@ -0,0 +1,89 @@
import logging
import base64
from io import BytesIO
from typing import List, Dict, Any
logger = logging.getLogger(__name__)
class SpeechToTextConverter:
"""语音转文字转换器"""
def __init__(self):
"""初始化语音转文字转换器"""
logger.info("初始化语音转文字转换器")
# 这里可以加载预训练模型
# 示例中使用简单的规则转换
def convert(self, audios: List[str], params: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""转换语音为文字
Args:
audios: 音频列表每个音频为base64编码字符串
params: 转换参数
Returns:
转换结果列表
"""
if params is None:
params = {}
language = params.get("language", "zh")
results = []
for audio_base64 in audios:
# 简单的规则转换示例
transcription = self._simple_convert(audio_base64, language)
results.append({
"audio": audio_base64[:100] + "..." if len(audio_base64) > 100 else audio_base64,
"text": transcription["text"],
"confidence": transcription["confidence"]
})
return results
def _simple_convert(self, audio_base64: str, language: str) -> Dict[str, Any]:
"""简单的语音转文字实现
Args:
audio_base64: base64编码的音频
language: 语言
Returns:
转换结果
"""
# 简单的规则转换(基于音频大小和内容特征)
try:
# 解码base64
audio_data = base64.b64decode(audio_base64)
# 计算音频大小特征
audio_size = len(audio_data)
# 基于大小的简单转换
if audio_size < 10240: # 小于10KB
text = "这是一段短音频"
elif audio_size < 102400: # 小于100KB
text = "这是一段中等长度的音频"
else: # 大于100KB
text = "这是一段长音频"
# 根据语言调整文本
if language == "en":
if audio_size < 10240:
text = "This is a short audio"
elif audio_size < 102400:
text = "This is a medium length audio"
else:
text = "This is a long audio"
return {
"text": text,
"confidence": 0.85
}
except Exception as e:
logger.error(f"Speech to text conversion error: {str(e)}")
return {
"text": "",
"confidence": 0.0
}

View File

@@ -0,0 +1,27 @@
from pydantic_settings import BaseSettings
from typing import Optional
class Settings(BaseSettings):
"""服务配置"""
# 服务基本配置
HOST: str = "0.0.0.0"
PORT: int = 8003
DEBUG: bool = True
# 服务名称
SERVICE_NAME: str = "speech-to-text"
# 日志配置
LOG_LEVEL: str = "info"
# 算法配置
DEFAULT_LANGUAGE: str = "zh"
class Config:
env_file = ".env"
case_sensitive = True
# 创建全局配置实例
settings = Settings()

View File

@@ -0,0 +1,108 @@
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import uvicorn
import json
import logging
import base64
from io import BytesIO
from .ai_algorithm import SpeechToTextConverter
from .config import settings
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 初始化FastAPI应用
app = FastAPI(
title="语音转文字服务",
description="提供语音转文字功能的AI服务",
version="1.0.0"
)
# 初始化转换器
converter = SpeechToTextConverter()
# 定义请求模型
class PredictRequest(BaseModel):
input_data: list
params: dict = {}
# 定义响应模型
class PredictResponse(BaseModel):
predictions: list
status: str
@app.post("/predict", response_model=PredictResponse)
async def predict(request: PredictRequest):
"""算法预测接口"""
try:
logger.info(f"Received prediction request for {len(request.input_data)} audio files")
predictions = converter.convert(request.input_data, request.params)
logger.info(f"Prediction completed: {predictions}")
return PredictResponse(
predictions=predictions,
status="success"
)
except Exception as e:
logger.error(f"Prediction error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/predict/file")
async def predict_file(file: UploadFile = File(...)):
"""通过文件上传进行预测"""
try:
logger.info(f"Received file upload: {file.filename}")
# 读取文件内容
contents = await file.read()
# 转换为base64
audio_base64 = base64.b64encode(contents).decode('utf-8')
# 调用转换器
predictions = converter.convert([audio_base64])
logger.info(f"File prediction completed: {predictions}")
return {
"predictions": predictions,
"status": "success",
"filename": file.filename
}
except Exception as e:
logger.error(f"File prediction error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
"""健康检查接口"""
return {
"status": "healthy",
"service": "speech-to-text",
"version": "1.0.0"
}
@app.get("/info")
async def service_info():
"""服务信息接口"""
return {
"name": "语音转文字服务",
"description": "提供语音转文字功能的AI服务",
"version": "1.0.0",
"endpoints": {
"/predict": "POST - 语音转文字预测",
"/predict/file": "POST - 通过文件上传进行预测",
"/health": "GET - 健康检查",
"/info": "GET - 服务信息"
}
}
if __name__ == "__main__":
uvicorn.run(
"main:app",
host=settings.HOST,
port=settings.PORT,
reload=settings.DEBUG
)

View File

@@ -0,0 +1,5 @@
fastapi==0.104.1
uvicorn==0.24.0.post1
pydantic==2.5.2
pydantic-settings==2.1.0
python-multipart==0.0.6

View File

@@ -0,0 +1,24 @@
#!/bin/bash
# 启动语音转文字服务
# 进入服务目录
cd "$(dirname "$0")"
# 检查虚拟环境是否存在
if [ ! -d "venv" ]; then
echo "创建虚拟环境..."
python3 -m venv venv
fi
# 激活虚拟环境
echo "激活虚拟环境..."
source venv/bin/activate
# 安装依赖
echo "安装依赖..."
pip install --no-cache-dir -r requirements.txt
# 启动服务
echo "启动语音转文字服务..."
python main.py