good version for 算法注册
This commit is contained in:
16
services/speech-to-text/Dockerfile
Normal file
16
services/speech-to-text/Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
||||
FROM python:3.9-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 安装依赖
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 复制代码
|
||||
COPY . .
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 8000
|
||||
|
||||
# 启动服务
|
||||
CMD ["python", "main.py"]
|
||||
89
services/speech-to-text/ai_algorithm.py
Normal file
89
services/speech-to-text/ai_algorithm.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import logging
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from typing import List, Dict, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SpeechToTextConverter:
|
||||
"""语音转文字转换器"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化语音转文字转换器"""
|
||||
logger.info("初始化语音转文字转换器")
|
||||
# 这里可以加载预训练模型
|
||||
# 示例中使用简单的规则转换
|
||||
|
||||
def convert(self, audios: List[str], params: Dict[str, Any] = None) -> List[Dict[str, Any]]:
|
||||
"""转换语音为文字
|
||||
|
||||
Args:
|
||||
audios: 音频列表,每个音频为base64编码字符串
|
||||
params: 转换参数
|
||||
|
||||
Returns:
|
||||
转换结果列表
|
||||
"""
|
||||
if params is None:
|
||||
params = {}
|
||||
|
||||
language = params.get("language", "zh")
|
||||
|
||||
results = []
|
||||
for audio_base64 in audios:
|
||||
# 简单的规则转换示例
|
||||
transcription = self._simple_convert(audio_base64, language)
|
||||
results.append({
|
||||
"audio": audio_base64[:100] + "..." if len(audio_base64) > 100 else audio_base64,
|
||||
"text": transcription["text"],
|
||||
"confidence": transcription["confidence"]
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def _simple_convert(self, audio_base64: str, language: str) -> Dict[str, Any]:
|
||||
"""简单的语音转文字实现
|
||||
|
||||
Args:
|
||||
audio_base64: base64编码的音频
|
||||
language: 语言
|
||||
|
||||
Returns:
|
||||
转换结果
|
||||
"""
|
||||
# 简单的规则转换(基于音频大小和内容特征)
|
||||
try:
|
||||
# 解码base64
|
||||
audio_data = base64.b64decode(audio_base64)
|
||||
|
||||
# 计算音频大小特征
|
||||
audio_size = len(audio_data)
|
||||
|
||||
# 基于大小的简单转换
|
||||
if audio_size < 10240: # 小于10KB
|
||||
text = "这是一段短音频"
|
||||
elif audio_size < 102400: # 小于100KB
|
||||
text = "这是一段中等长度的音频"
|
||||
else: # 大于100KB
|
||||
text = "这是一段长音频"
|
||||
|
||||
# 根据语言调整文本
|
||||
if language == "en":
|
||||
if audio_size < 10240:
|
||||
text = "This is a short audio"
|
||||
elif audio_size < 102400:
|
||||
text = "This is a medium length audio"
|
||||
else:
|
||||
text = "This is a long audio"
|
||||
|
||||
return {
|
||||
"text": text,
|
||||
"confidence": 0.85
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Speech to text conversion error: {str(e)}")
|
||||
return {
|
||||
"text": "",
|
||||
"confidence": 0.0
|
||||
}
|
||||
27
services/speech-to-text/config.py
Normal file
27
services/speech-to-text/config.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""服务配置"""
|
||||
# 服务基本配置
|
||||
HOST: str = "0.0.0.0"
|
||||
PORT: int = 8003
|
||||
DEBUG: bool = True
|
||||
|
||||
# 服务名称
|
||||
SERVICE_NAME: str = "speech-to-text"
|
||||
|
||||
# 日志配置
|
||||
LOG_LEVEL: str = "info"
|
||||
|
||||
# 算法配置
|
||||
DEFAULT_LANGUAGE: str = "zh"
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = True
|
||||
|
||||
|
||||
# 创建全局配置实例
|
||||
settings = Settings()
|
||||
108
services/speech-to-text/main.py
Normal file
108
services/speech-to-text/main.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from fastapi import FastAPI, HTTPException, UploadFile, File
|
||||
from pydantic import BaseModel
|
||||
import uvicorn
|
||||
import json
|
||||
import logging
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from .ai_algorithm import SpeechToTextConverter
|
||||
from .config import settings
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 初始化FastAPI应用
|
||||
app = FastAPI(
|
||||
title="语音转文字服务",
|
||||
description="提供语音转文字功能的AI服务",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# 初始化转换器
|
||||
converter = SpeechToTextConverter()
|
||||
|
||||
# 定义请求模型
|
||||
class PredictRequest(BaseModel):
|
||||
input_data: list
|
||||
params: dict = {}
|
||||
|
||||
# 定义响应模型
|
||||
class PredictResponse(BaseModel):
|
||||
predictions: list
|
||||
status: str
|
||||
|
||||
@app.post("/predict", response_model=PredictResponse)
|
||||
async def predict(request: PredictRequest):
|
||||
"""算法预测接口"""
|
||||
try:
|
||||
logger.info(f"Received prediction request for {len(request.input_data)} audio files")
|
||||
predictions = converter.convert(request.input_data, request.params)
|
||||
logger.info(f"Prediction completed: {predictions}")
|
||||
return PredictResponse(
|
||||
predictions=predictions,
|
||||
status="success"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Prediction error: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.post("/predict/file")
|
||||
async def predict_file(file: UploadFile = File(...)):
|
||||
"""通过文件上传进行预测"""
|
||||
try:
|
||||
logger.info(f"Received file upload: {file.filename}")
|
||||
|
||||
# 读取文件内容
|
||||
contents = await file.read()
|
||||
|
||||
# 转换为base64
|
||||
audio_base64 = base64.b64encode(contents).decode('utf-8')
|
||||
|
||||
# 调用转换器
|
||||
predictions = converter.convert([audio_base64])
|
||||
|
||||
logger.info(f"File prediction completed: {predictions}")
|
||||
return {
|
||||
"predictions": predictions,
|
||||
"status": "success",
|
||||
"filename": file.filename
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"File prediction error: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""健康检查接口"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "speech-to-text",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
|
||||
@app.get("/info")
|
||||
async def service_info():
|
||||
"""服务信息接口"""
|
||||
return {
|
||||
"name": "语音转文字服务",
|
||||
"description": "提供语音转文字功能的AI服务",
|
||||
"version": "1.0.0",
|
||||
"endpoints": {
|
||||
"/predict": "POST - 语音转文字预测",
|
||||
"/predict/file": "POST - 通过文件上传进行预测",
|
||||
"/health": "GET - 健康检查",
|
||||
"/info": "GET - 服务信息"
|
||||
}
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(
|
||||
"main:app",
|
||||
host=settings.HOST,
|
||||
port=settings.PORT,
|
||||
reload=settings.DEBUG
|
||||
)
|
||||
5
services/speech-to-text/requirements.txt
Normal file
5
services/speech-to-text/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
fastapi==0.104.1
|
||||
uvicorn==0.24.0.post1
|
||||
pydantic==2.5.2
|
||||
pydantic-settings==2.1.0
|
||||
python-multipart==0.0.6
|
||||
24
services/speech-to-text/start.sh
Normal file
24
services/speech-to-text/start.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 启动语音转文字服务
|
||||
|
||||
# 进入服务目录
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
# 检查虚拟环境是否存在
|
||||
if [ ! -d "venv" ]; then
|
||||
echo "创建虚拟环境..."
|
||||
python3 -m venv venv
|
||||
fi
|
||||
|
||||
# 激活虚拟环境
|
||||
echo "激活虚拟环境..."
|
||||
source venv/bin/activate
|
||||
|
||||
# 安装依赖
|
||||
echo "安装依赖..."
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 启动服务
|
||||
echo "启动语音转文字服务..."
|
||||
python main.py
|
||||
Reference in New Issue
Block a user