good version for 算法注册

2026-02-15 21:23:28 +08:00
parent 3c03777b97
commit 62ea5d36a5
115 changed files with 9566 additions and 1576 deletions
--- a/services/speech-to-text/ai_algorithm.py
+++ b/services/speech-to-text/ai_algorithm.py
@@ -0,0 +1,89 @@
+import logging
+import base64
+from io import BytesIO
+from typing import List, Dict, Any
+
+logger = logging.getLogger(__name__)
+
+
+class SpeechToTextConverter:
+    """语音转文字转换器"""
+    
+    def __init__(self):
+        """初始化语音转文字转换器"""
+        logger.info("初始化语音转文字转换器")
+        # 这里可以加载预训练模型
+        # 示例中使用简单的规则转换
+    
+    def convert(self, audios: List[str], params: Dict[str, Any] = None) -> List[Dict[str, Any]]:
+        """转换语音为文字
+        
+        Args:
+            audios: 音频列表，每个音频为base64编码字符串
+            params: 转换参数
+            
+        Returns:
+            转换结果列表
+        """
+        if params is None:
+            params = {}
+        
+        language = params.get("language", "zh")
+        
+        results = []
+        for audio_base64 in audios:
+            # 简单的规则转换示例
+            transcription = self._simple_convert(audio_base64, language)
+            results.append({
+                "audio": audio_base64[:100] + "..." if len(audio_base64) > 100 else audio_base64,
+                "text": transcription["text"],
+                "confidence": transcription["confidence"]
+            })
+        
+        return results
+    
+    def _simple_convert(self, audio_base64: str, language: str) -> Dict[str, Any]:
+        """简单的语音转文字实现
+        
+        Args:
+            audio_base64: base64编码的音频
+            language: 语言
+            
+        Returns:
+            转换结果
+        """
+        # 简单的规则转换（基于音频大小和内容特征）
+        try:
+            # 解码base64
+            audio_data = base64.b64decode(audio_base64)
+            
+            # 计算音频大小特征
+            audio_size = len(audio_data)
+            
+            # 基于大小的简单转换
+            if audio_size < 10240:  # 小于10KB
+                text = "这是一段短音频"
+            elif audio_size < 102400:  # 小于100KB
+                text = "这是一段中等长度的音频"
+            else:  # 大于100KB
+                text = "这是一段长音频"
+            
+            # 根据语言调整文本
+            if language == "en":
+                if audio_size < 10240:
+                    text = "This is a short audio"
+                elif audio_size < 102400:
+                    text = "This is a medium length audio"
+                else:
+                    text = "This is a long audio"
+            
+            return {
+                "text": text,
+                "confidence": 0.85
+            }
+        except Exception as e:
+            logger.error(f"Speech to text conversion error: {str(e)}")
+            return {
+                "text": "",
+                "confidence": 0.0
+            }