first commit

2026-03-21 09:12:47 +08:00
commit a1e76157c9
80 changed files with 506309 additions and 0 deletions
--- a/translate_aliyun.py
+++ b/translate_aliyun.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+"""
+使用阿里百炼API翻译题目内容
+"""
+import json
+import os
+import time
+import urllib.request
+import urllib.parse
+import ssl
+
+API_KEY = "sk-74905419d30541d18991396892bb27b0"
+API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
+
+def translate_text(text):
+    """
+    使用阿里百炼API翻译文本
+    """
+    if not text or not text.strip():
+        return text
+    
+    prompt = f"""请将以下英文翻译成中文，保持专业术语的准确性，直接输出翻译结果，不要添加任何解释：
+
+英文原文：
+{text}
+
+中文翻译："""
+
+    try:
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {API_KEY}'
+        }
+        
+        data = {
+            "model": "qwen-turbo",
+            "input": {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": prompt
+                    }
+                ]
+            },
+            "parameters": {
+                "temperature": 0.1,
+                "max_tokens": 2000
+            }
+        }
+        
+        req = urllib.request.Request(
+            API_URL,
+            data=json.dumps(data).encode('utf-8'),
+            headers=headers,
+            method='POST'
+        )
+        
+        ssl_context = ssl.create_default_context()
+        
+        with urllib.request.urlopen(req, context=ssl_context, timeout=60) as response:
+            result = json.loads(response.read().decode('utf-8'))
+            
+            if result and 'output' in result and 'text' in result['output']:
+                translated = result['output']['text'].strip()
+                if translated.startswith('中文翻译：'):
+                    translated = translated[6:].strip()
+                return translated
+            else:
+                print(f"API响应格式错误: {result}")
+                return text
+                
+    except Exception as e:
+        print(f"翻译错误: {e}")
+        return text
+
+def translate_questions(input_file, output_file, start_index=0, batch_size=10):
+    """
+    翻译题目，支持分批处理
+    """
+    if os.path.exists(output_file):
+        with open(output_file, 'r', encoding='utf-8') as f:
+            questions = json.load(f)
+    else:
+        with open(input_file, 'r', encoding='utf-8') as f:
+            questions = json.load(f)
+    
+    total = len(questions)
+    end_index = min(start_index + batch_size, total)
+    
+    print(f"正在翻译第 {start_index + 1} 到 {end_index} 题，共 {total} 题")
+    
+    for i in range(start_index, end_index):
+        q = questions[i]
+        print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
+        
+        if 'stem_cn' not in q or not q['stem_cn'] or q['stem_cn'] == '待翻译...':
+            print(f"  翻译题干...")
+            q['stem_cn'] = translate_text(q['stem'])
+            time.sleep(0.5)
+        
+        for j, opt in enumerate(q['options']):
+            if 'text_cn' not in opt or not opt['text_cn'] or opt['text_cn'] == '待翻译...':
+                print(f"  翻译选项 {opt['label']}...")
+                opt['text_cn'] = translate_text(opt['text'])
+                time.sleep(0.3)
+        
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(questions, f, ensure_ascii=False, indent=2)
+    
+    print(f"\n翻译进度: {end_index}/{total}")
+    
+    if end_index < total:
+        print(f"继续翻译请运行: python3 translate_aliyun.py --start {end_index}")
+    else:
+        print("所有题目翻译完成！")
+    
+    return questions
+
+if __name__ == '__main__':
+    import argparse
+    
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--start', type=int, default=0, help='起始索引')
+    parser.add_argument('--batch', type=int, default=10, help='批量大小')
+    args = parser.parse_args()
+    
+    input_file = '/Users/duguoyou/D365/exam_data/questions.json'
+    output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
+    
+    translate_questions(input_file, output_file, args.start, args.batch)