#!/usr/bin/env python3 """ 使用阿里百炼API翻译题目内容 """ import json import os import time import urllib.request import urllib.parse import ssl API_KEY = "sk-74905419d30541d18991396892bb27b0" API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation" def translate_text(text): """ 使用阿里百炼API翻译文本 """ if not text or not text.strip(): return text prompt = f"""请将以下英文翻译成中文,保持专业术语的准确性,直接输出翻译结果,不要添加任何解释: 英文原文: {text} 中文翻译:""" try: headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {API_KEY}' } data = { "model": "qwen-turbo", "input": { "messages": [ { "role": "user", "content": prompt } ] }, "parameters": { "temperature": 0.1, "max_tokens": 2000 } } req = urllib.request.Request( API_URL, data=json.dumps(data).encode('utf-8'), headers=headers, method='POST' ) ssl_context = ssl.create_default_context() with urllib.request.urlopen(req, context=ssl_context, timeout=60) as response: result = json.loads(response.read().decode('utf-8')) if result and 'output' in result and 'text' in result['output']: translated = result['output']['text'].strip() if translated.startswith('中文翻译:'): translated = translated[6:].strip() return translated else: print(f"API响应格式错误: {result}") return text except Exception as e: print(f"翻译错误: {e}") return text def translate_questions(input_file, output_file, start_index=0, batch_size=10): """ 翻译题目,支持分批处理 """ if os.path.exists(output_file): with open(output_file, 'r', encoding='utf-8') as f: questions = json.load(f) else: with open(input_file, 'r', encoding='utf-8') as f: questions = json.load(f) total = len(questions) end_index = min(start_index + batch_size, total) print(f"正在翻译第 {start_index + 1} 到 {end_index} 题,共 {total} 题") for i in range(start_index, end_index): q = questions[i] print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})") if 'stem_cn' not in q or not q['stem_cn'] or q['stem_cn'] == '待翻译...': print(f" 翻译题干...") q['stem_cn'] = translate_text(q['stem']) time.sleep(0.5) for j, opt in enumerate(q['options']): if 'text_cn' not in opt or not opt['text_cn'] or opt['text_cn'] == '待翻译...': print(f" 翻译选项 {opt['label']}...") opt['text_cn'] = translate_text(opt['text']) time.sleep(0.3) with open(output_file, 'w', encoding='utf-8') as f: json.dump(questions, f, ensure_ascii=False, indent=2) print(f"\n翻译进度: {end_index}/{total}") if end_index < total: print(f"继续翻译请运行: python3 translate_aliyun.py --start {end_index}") else: print("所有题目翻译完成!") return questions if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--start', type=int, default=0, help='起始索引') parser.add_argument('--batch', type=int, default=10, help='批量大小') args = parser.parse_args() input_file = '/Users/duguoyou/D365/exam_data/questions.json' output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json' translate_questions(input_file, output_file, args.start, args.batch)