Files
d365scm/translate_api.py
2026-03-21 09:12:47 +08:00

100 lines
3.0 KiB
Python

#!/usr/bin/env python3
"""
翻译题目内容 - 使用翻译API
"""
import json
import os
import time
import urllib.request
import urllib.parse
import ssl
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
def translate_text(text, target_lang='zh-CN'):
"""
使用Google Translate API翻译文本
"""
if not text or not text.strip():
return text
try:
base_url = "https://translate.googleapis.com/translate_a/single"
params = {
'client': 'gtx',
'sl': 'en',
'tl': target_lang,
'dt': 't',
'q': text
}
url = base_url + '?' + urllib.parse.urlencode(params)
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0')
with urllib.request.urlopen(req, context=ssl_context, timeout=30) as response:
result = json.loads(response.read().decode('utf-8'))
if result and result[0]:
translated = ''.join([item[0] for item in result[0] if item[0]])
return translated
except Exception as e:
print(f"翻译错误: {e}")
return text
return text
def translate_questions(input_file, output_file, start_index=0, batch_size=50):
"""
翻译题目,支持分批处理
"""
with open(input_file, 'r', encoding='utf-8') as f:
questions = json.load(f)
total = len(questions)
end_index = min(start_index + batch_size, total)
print(f"正在翻译第 {start_index + 1}{end_index} 题,共 {total}")
for i in range(start_index, end_index):
q = questions[i]
print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
if 'stem_cn' not in q or not q['stem_cn']:
q['stem_cn'] = translate_text(q['stem'])
time.sleep(0.5)
for opt in q['options']:
if 'text_cn' not in opt or not opt['text_cn']:
opt['text_cn'] = translate_text(opt['text'])
time.sleep(0.3)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(questions, f, ensure_ascii=False, indent=2)
print(f"翻译进度: {end_index}/{total}")
if end_index < total:
print(f"继续翻译请运行: python3 translate_api.py --start {end_index}")
return questions
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--start', type=int, default=0, help='起始索引')
parser.add_argument('--batch', type=int, default=50, help='批量大小')
args = parser.parse_args()
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
if os.path.exists(output_file):
input_file = output_file
translate_questions(input_file, output_file, args.start, args.batch)