first commit
This commit is contained in:
130
translate_aliyun.py
Normal file
130
translate_aliyun.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
使用阿里百炼API翻译题目内容
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import ssl
|
||||
|
||||
API_KEY = "sk-74905419d30541d18991396892bb27b0"
|
||||
API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
|
||||
|
||||
def translate_text(text):
|
||||
"""
|
||||
使用阿里百炼API翻译文本
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return text
|
||||
|
||||
prompt = f"""请将以下英文翻译成中文,保持专业术语的准确性,直接输出翻译结果,不要添加任何解释:
|
||||
|
||||
英文原文:
|
||||
{text}
|
||||
|
||||
中文翻译:"""
|
||||
|
||||
try:
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {API_KEY}'
|
||||
}
|
||||
|
||||
data = {
|
||||
"model": "qwen-turbo",
|
||||
"input": {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
]
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 2000
|
||||
}
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
API_URL,
|
||||
data=json.dumps(data).encode('utf-8'),
|
||||
headers=headers,
|
||||
method='POST'
|
||||
)
|
||||
|
||||
ssl_context = ssl.create_default_context()
|
||||
|
||||
with urllib.request.urlopen(req, context=ssl_context, timeout=60) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
if result and 'output' in result and 'text' in result['output']:
|
||||
translated = result['output']['text'].strip()
|
||||
if translated.startswith('中文翻译:'):
|
||||
translated = translated[6:].strip()
|
||||
return translated
|
||||
else:
|
||||
print(f"API响应格式错误: {result}")
|
||||
return text
|
||||
|
||||
except Exception as e:
|
||||
print(f"翻译错误: {e}")
|
||||
return text
|
||||
|
||||
def translate_questions(input_file, output_file, start_index=0, batch_size=10):
|
||||
"""
|
||||
翻译题目,支持分批处理
|
||||
"""
|
||||
if os.path.exists(output_file):
|
||||
with open(output_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
else:
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
|
||||
total = len(questions)
|
||||
end_index = min(start_index + batch_size, total)
|
||||
|
||||
print(f"正在翻译第 {start_index + 1} 到 {end_index} 题,共 {total} 题")
|
||||
|
||||
for i in range(start_index, end_index):
|
||||
q = questions[i]
|
||||
print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
|
||||
|
||||
if 'stem_cn' not in q or not q['stem_cn'] or q['stem_cn'] == '待翻译...':
|
||||
print(f" 翻译题干...")
|
||||
q['stem_cn'] = translate_text(q['stem'])
|
||||
time.sleep(0.5)
|
||||
|
||||
for j, opt in enumerate(q['options']):
|
||||
if 'text_cn' not in opt or not opt['text_cn'] or opt['text_cn'] == '待翻译...':
|
||||
print(f" 翻译选项 {opt['label']}...")
|
||||
opt['text_cn'] = translate_text(opt['text'])
|
||||
time.sleep(0.3)
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(questions, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n翻译进度: {end_index}/{total}")
|
||||
|
||||
if end_index < total:
|
||||
print(f"继续翻译请运行: python3 translate_aliyun.py --start {end_index}")
|
||||
else:
|
||||
print("所有题目翻译完成!")
|
||||
|
||||
return questions
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--start', type=int, default=0, help='起始索引')
|
||||
parser.add_argument('--batch', type=int, default=10, help='批量大小')
|
||||
args = parser.parse_args()
|
||||
|
||||
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
|
||||
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
|
||||
|
||||
translate_questions(input_file, output_file, args.start, args.batch)
|
||||
Reference in New Issue
Block a user