d365scm/translate_api.py

#!/usr/bin/env python3
"""
翻译题目内容 - 使用翻译API
"""
import json
import os
import time
import urllib.request
import urllib.parse
import ssl

ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE

def translate_text(text, target_lang='zh-CN'):
    """
    使用Google Translate API翻译文本
    """
    if not text or not text.strip():
        return text

    try:
        base_url = "https://translate.googleapis.com/translate_a/single"
        params = {
            'client': 'gtx',
            'sl': 'en',
            'tl': target_lang,
            'dt': 't',
            'q': text
        }

        url = base_url + '?' + urllib.parse.urlencode(params)

        req = urllib.request.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0')

        with urllib.request.urlopen(req, context=ssl_context, timeout=30) as response:
            result = json.loads(response.read().decode('utf-8'))

            if result and result[0]:
                translated = ''.join([item[0] for item in result[0] if item[0]])
                return translated
    except Exception as e:
        print(f"翻译错误: {e}")
        return text

    return text

def translate_questions(input_file, output_file, start_index=0, batch_size=50):
    """
    翻译题目，支持分批处理
    """
    with open(input_file, 'r', encoding='utf-8') as f:
        questions = json.load(f)

    total = len(questions)
    end_index = min(start_index + batch_size, total)

    print(f"正在翻译第 {start_index + 1} 到 {end_index} 题，共 {total} 题")

    for i in range(start_index, end_index):
        q = questions[i]
        print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")

        if 'stem_cn' not in q or not q['stem_cn']:
            q['stem_cn'] = translate_text(q['stem'])
            time.sleep(0.5)

        for opt in q['options']:
            if 'text_cn' not in opt or not opt['text_cn']:
                opt['text_cn'] = translate_text(opt['text'])
                time.sleep(0.3)

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(questions, f, ensure_ascii=False, indent=2)

    print(f"翻译进度: {end_index}/{total}")

    if end_index < total:
        print(f"继续翻译请运行: python3 translate_api.py --start {end_index}")

    return questions

if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--start', type=int, default=0, help='起始索引')
    parser.add_argument('--batch', type=int, default=50, help='批量大小')
    args = parser.parse_args()

    input_file = '/Users/duguoyou/D365/exam_data/questions.json'
    output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'

    if os.path.exists(output_file):
        input_file = output_file

    translate_questions(input_file, output_file, args.start, args.batch)