good version for 算法注册
This commit is contained in:
89
services/speech-to-text/ai_algorithm.py
Normal file
89
services/speech-to-text/ai_algorithm.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import logging
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from typing import List, Dict, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SpeechToTextConverter:
|
||||
"""语音转文字转换器"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化语音转文字转换器"""
|
||||
logger.info("初始化语音转文字转换器")
|
||||
# 这里可以加载预训练模型
|
||||
# 示例中使用简单的规则转换
|
||||
|
||||
def convert(self, audios: List[str], params: Dict[str, Any] = None) -> List[Dict[str, Any]]:
|
||||
"""转换语音为文字
|
||||
|
||||
Args:
|
||||
audios: 音频列表,每个音频为base64编码字符串
|
||||
params: 转换参数
|
||||
|
||||
Returns:
|
||||
转换结果列表
|
||||
"""
|
||||
if params is None:
|
||||
params = {}
|
||||
|
||||
language = params.get("language", "zh")
|
||||
|
||||
results = []
|
||||
for audio_base64 in audios:
|
||||
# 简单的规则转换示例
|
||||
transcription = self._simple_convert(audio_base64, language)
|
||||
results.append({
|
||||
"audio": audio_base64[:100] + "..." if len(audio_base64) > 100 else audio_base64,
|
||||
"text": transcription["text"],
|
||||
"confidence": transcription["confidence"]
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def _simple_convert(self, audio_base64: str, language: str) -> Dict[str, Any]:
|
||||
"""简单的语音转文字实现
|
||||
|
||||
Args:
|
||||
audio_base64: base64编码的音频
|
||||
language: 语言
|
||||
|
||||
Returns:
|
||||
转换结果
|
||||
"""
|
||||
# 简单的规则转换(基于音频大小和内容特征)
|
||||
try:
|
||||
# 解码base64
|
||||
audio_data = base64.b64decode(audio_base64)
|
||||
|
||||
# 计算音频大小特征
|
||||
audio_size = len(audio_data)
|
||||
|
||||
# 基于大小的简单转换
|
||||
if audio_size < 10240: # 小于10KB
|
||||
text = "这是一段短音频"
|
||||
elif audio_size < 102400: # 小于100KB
|
||||
text = "这是一段中等长度的音频"
|
||||
else: # 大于100KB
|
||||
text = "这是一段长音频"
|
||||
|
||||
# 根据语言调整文本
|
||||
if language == "en":
|
||||
if audio_size < 10240:
|
||||
text = "This is a short audio"
|
||||
elif audio_size < 102400:
|
||||
text = "This is a medium length audio"
|
||||
else:
|
||||
text = "This is a long audio"
|
||||
|
||||
return {
|
||||
"text": text,
|
||||
"confidence": 0.85
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Speech to text conversion error: {str(e)}")
|
||||
return {
|
||||
"text": "",
|
||||
"confidence": 0.0
|
||||
}
|
||||
Reference in New Issue
Block a user