first commit
This commit is contained in:
470844
MB-330_with_discussion.pdf
Normal file
470844
MB-330_with_discussion.pdf
Normal file
File diff suppressed because one or more lines are too long
73
analyze_pdf.py
Normal file
73
analyze_pdf.py
Normal file
@@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
分析PDF文件结构,识别Topic位置
|
||||
"""
|
||||
import re
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
def analyze_pdf_structure(pdf_path):
|
||||
"""
|
||||
分析PDF结构,找出所有Topic的位置
|
||||
"""
|
||||
reader = PdfReader(pdf_path)
|
||||
total_pages = len(reader.pages)
|
||||
print(f"PDF总页数: {total_pages}")
|
||||
|
||||
topic_pattern = re.compile(r'^Topic\s+(\d+)', re.IGNORECASE)
|
||||
question_pattern = re.compile(r'^Question\s+(\d+)', re.IGNORECASE)
|
||||
|
||||
topics = {}
|
||||
current_topic = None
|
||||
question_count = 0
|
||||
total_questions = 0
|
||||
|
||||
for page_num in range(total_pages):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
|
||||
if text:
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
topic_match = topic_pattern.match(line)
|
||||
if topic_match:
|
||||
if current_topic is not None:
|
||||
topics[current_topic]['end_page'] = page_num
|
||||
topics[current_topic]['question_count'] = question_count
|
||||
total_questions += question_count
|
||||
|
||||
topic_num = int(topic_match.group(1))
|
||||
current_topic = topic_num
|
||||
topics[topic_num] = {
|
||||
'title': line,
|
||||
'start_page': page_num,
|
||||
'end_page': None,
|
||||
'question_count': 0
|
||||
}
|
||||
question_count = 0
|
||||
print(f"发现 Topic {topic_num}: 第 {page_num + 1} 页 - {line}")
|
||||
|
||||
question_match = question_pattern.match(line)
|
||||
if question_match and current_topic is not None:
|
||||
q_num = int(question_match.group(1))
|
||||
if q_num > question_count:
|
||||
question_count = q_num
|
||||
|
||||
if current_topic is not None:
|
||||
topics[current_topic]['end_page'] = total_pages - 1
|
||||
topics[current_topic]['question_count'] = question_count
|
||||
total_questions += question_count
|
||||
|
||||
print(f"\n共发现 {len(topics)} 个Topic")
|
||||
print(f"总题目数: {total_questions}")
|
||||
print("\n各Topic统计:")
|
||||
for topic_num in sorted(topics.keys()):
|
||||
info = topics[topic_num]
|
||||
print(f" Topic {topic_num}: 第 {info['start_page']+1}-{info['end_page']+1} 页, {info['question_count']} 道题")
|
||||
|
||||
return topics, total_pages
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
|
||||
topics, total_pages = analyze_pdf_structure(pdf_path)
|
||||
91
analyze_pdf_v2.py
Normal file
91
analyze_pdf_v2.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
分析PDF文件结构,识别Topic位置和题目数量
|
||||
"""
|
||||
import re
|
||||
import json
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
def analyze_pdf_structure(pdf_path):
|
||||
"""
|
||||
分析PDF结构,找出所有Topic的位置和题目数量
|
||||
"""
|
||||
reader = PdfReader(pdf_path)
|
||||
total_pages = len(reader.pages)
|
||||
print(f"PDF总页数: {total_pages}")
|
||||
|
||||
topic_pattern = re.compile(r'^Topic\s+(\d+)$', re.IGNORECASE)
|
||||
question_pattern = re.compile(r'Question\s+#(\d+)', re.IGNORECASE)
|
||||
|
||||
topics = {}
|
||||
current_topic = None
|
||||
current_topic_start = None
|
||||
all_questions = []
|
||||
|
||||
for page_num in range(total_pages):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
|
||||
if text:
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
topic_match = topic_pattern.match(line)
|
||||
if topic_match:
|
||||
if current_topic is not None:
|
||||
topics[current_topic] = {
|
||||
'topic_num': current_topic,
|
||||
'start_page': current_topic_start,
|
||||
'end_page': page_num - 1,
|
||||
'questions': all_questions.copy()
|
||||
}
|
||||
all_questions = []
|
||||
|
||||
current_topic = int(topic_match.group(1))
|
||||
current_topic_start = page_num
|
||||
print(f"发现 Topic {current_topic}: 第 {page_num + 1} 页")
|
||||
|
||||
question_match = question_pattern.search(line)
|
||||
if question_match:
|
||||
q_num = int(question_match.group(1))
|
||||
if q_num not in all_questions:
|
||||
all_questions.append(q_num)
|
||||
|
||||
if current_topic is not None:
|
||||
topics[current_topic] = {
|
||||
'topic_num': current_topic,
|
||||
'start_page': current_topic_start,
|
||||
'end_page': total_pages - 1,
|
||||
'questions': all_questions
|
||||
}
|
||||
|
||||
print(f"\n共发现 {len(topics)} 个Topic")
|
||||
|
||||
result = []
|
||||
total_questions = 0
|
||||
print("\n各Topic统计:")
|
||||
for topic_num in sorted(topics.keys()):
|
||||
info = topics[topic_num]
|
||||
q_count = len(info['questions'])
|
||||
total_questions += q_count
|
||||
print(f" Topic {topic_num}: 第 {info['start_page']+1}-{info['end_page']+1} 页, {q_count} 道题")
|
||||
result.append({
|
||||
'topic_num': topic_num,
|
||||
'start_page': info['start_page'],
|
||||
'end_page': info['end_page'],
|
||||
'question_count': q_count,
|
||||
'questions': sorted(info['questions'])
|
||||
})
|
||||
|
||||
print(f"\n总题目数: {total_questions}")
|
||||
|
||||
with open('/Users/duguoyou/D365/topics_info.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
print("\nTopic信息已保存到 topics_info.json")
|
||||
|
||||
return result
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
|
||||
topics = analyze_pdf_structure(pdf_path)
|
||||
82
analyze_pdf_v3.py
Normal file
82
analyze_pdf_v3.py
Normal file
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
分析PDF文件结构,识别Topic位置和题目数量 - 改进版
|
||||
"""
|
||||
import re
|
||||
import json
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
def analyze_pdf_structure(pdf_path):
|
||||
"""
|
||||
分析PDF结构,找出所有Topic的位置和题目数量
|
||||
"""
|
||||
reader = PdfReader(pdf_path)
|
||||
total_pages = len(reader.pages)
|
||||
print(f"PDF总页数: {total_pages}")
|
||||
|
||||
topic_header_pattern = re.compile(r'^Topic\s+(\d+)$', re.IGNORECASE)
|
||||
question_pattern = re.compile(r'Question\s+#(\d+)', re.IGNORECASE)
|
||||
|
||||
topic_pages = {}
|
||||
all_questions_in_topic = {}
|
||||
|
||||
for page_num in range(total_pages):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
|
||||
if text:
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
topic_match = topic_header_pattern.match(line)
|
||||
if topic_match:
|
||||
topic_num = int(topic_match.group(1))
|
||||
if topic_num not in topic_pages:
|
||||
topic_pages[topic_num] = {'first_page': page_num, 'last_page': page_num}
|
||||
all_questions_in_topic[topic_num] = []
|
||||
else:
|
||||
topic_pages[topic_num]['last_page'] = page_num
|
||||
|
||||
question_match = question_pattern.search(line)
|
||||
if question_match:
|
||||
q_num = int(question_match.group(1))
|
||||
for topic_num in sorted(topic_pages.keys(), reverse=True):
|
||||
if topic_num not in all_questions_in_topic:
|
||||
all_questions_in_topic[topic_num] = []
|
||||
if page_num >= topic_pages[topic_num]['first_page']:
|
||||
if q_num not in all_questions_in_topic[topic_num]:
|
||||
all_questions_in_topic[topic_num].append(q_num)
|
||||
break
|
||||
|
||||
print(f"\n共发现 {len(topic_pages)} 个Topic")
|
||||
|
||||
result = []
|
||||
total_questions = 0
|
||||
print("\n各Topic统计:")
|
||||
for topic_num in sorted(topic_pages.keys()):
|
||||
questions = sorted(all_questions_in_topic.get(topic_num, []))
|
||||
q_count = len(questions)
|
||||
total_questions += q_count
|
||||
first_page = topic_pages[topic_num]['first_page']
|
||||
last_page = topic_pages[topic_num]['last_page']
|
||||
print(f" Topic {topic_num}: 第 {first_page+1}-{last_page+1} 页, {q_count} 道题 (题目编号: {questions[:5]}{'...' if len(questions) > 5 else ''})")
|
||||
result.append({
|
||||
'topic_num': topic_num,
|
||||
'start_page': first_page,
|
||||
'end_page': last_page,
|
||||
'question_count': q_count,
|
||||
'questions': questions
|
||||
})
|
||||
|
||||
print(f"\n总题目数: {total_questions}")
|
||||
|
||||
with open('/Users/duguoyou/D365/topics_info.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
print("\nTopic信息已保存到 topics_info.json")
|
||||
|
||||
return result
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
|
||||
topics = analyze_pdf_structure(pdf_path)
|
||||
24
exam-viewer/.gitignore
vendored
Normal file
24
exam-viewer/.gitignore
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
||||
*.local
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
3
exam-viewer/.vscode/extensions.json
vendored
Normal file
3
exam-viewer/.vscode/extensions.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"recommendations": ["Vue.volar"]
|
||||
}
|
||||
5
exam-viewer/README.md
Normal file
5
exam-viewer/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Vue 3 + TypeScript + Vite
|
||||
|
||||
This template should help get you started developing with Vue 3 and TypeScript in Vite. The template uses Vue 3 `<script setup>` SFCs, check out the [script setup docs](https://v3.vuejs.org/api/sfc-script-setup.html#sfc-script-setup) to learn more.
|
||||
|
||||
Learn more about the recommended Project Setup and IDE Support in the [Vue Docs TypeScript Guide](https://vuejs.org/guide/typescript/overview.html#project-setup).
|
||||
13
exam-viewer/index.html
Normal file
13
exam-viewer/index.html
Normal file
@@ -0,0 +1,13 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>exam-viewer</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<script type="module" src="/src/main.ts"></script>
|
||||
</body>
|
||||
</html>
|
||||
1570
exam-viewer/package-lock.json
generated
Normal file
1570
exam-viewer/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
25
exam-viewer/package.json
Normal file
25
exam-viewer/package.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"name": "exam-viewer",
|
||||
"private": true,
|
||||
"version": "0.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "vue-tsc -b && vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@element-plus/icons-vue": "^2.3.2",
|
||||
"element-plus": "^2.13.6",
|
||||
"pinia": "^3.0.4",
|
||||
"vue": "^3.5.30"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^24.12.0",
|
||||
"@vitejs/plugin-vue": "^6.0.5",
|
||||
"@vue/tsconfig": "^0.9.0",
|
||||
"typescript": "~5.9.3",
|
||||
"vite": "^8.0.1",
|
||||
"vue-tsc": "^3.2.5"
|
||||
}
|
||||
}
|
||||
1
exam-viewer/public/favicon.svg
Normal file
1
exam-viewer/public/favicon.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 9.3 KiB |
24
exam-viewer/public/icons.svg
Normal file
24
exam-viewer/public/icons.svg
Normal file
@@ -0,0 +1,24 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg">
|
||||
<symbol id="bluesky-icon" viewBox="0 0 16 17">
|
||||
<g clip-path="url(#bluesky-clip)"><path fill="#08060d" d="M7.75 7.735c-.693-1.348-2.58-3.86-4.334-5.097-1.68-1.187-2.32-.981-2.74-.79C.188 2.065.1 2.812.1 3.251s.241 3.602.398 4.13c.52 1.744 2.367 2.333 4.07 2.145-2.495.37-4.71 1.278-1.805 4.512 3.196 3.309 4.38-.71 4.987-2.746.608 2.036 1.307 5.91 4.93 2.746 2.72-2.746.747-4.143-1.747-4.512 1.702.189 3.55-.4 4.07-2.145.156-.528.397-3.691.397-4.13s-.088-1.186-.575-1.406c-.42-.19-1.06-.395-2.741.79-1.755 1.24-3.64 3.752-4.334 5.099"/></g>
|
||||
<defs><clipPath id="bluesky-clip"><path fill="#fff" d="M.1.85h15.3v15.3H.1z"/></clipPath></defs>
|
||||
</symbol>
|
||||
<symbol id="discord-icon" viewBox="0 0 20 19">
|
||||
<path fill="#08060d" d="M16.224 3.768a14.5 14.5 0 0 0-3.67-1.153c-.158.286-.343.67-.47.976a13.5 13.5 0 0 0-4.067 0c-.128-.306-.317-.69-.476-.976A14.4 14.4 0 0 0 3.868 3.77C1.546 7.28.916 10.703 1.231 14.077a14.7 14.7 0 0 0 4.5 2.306q.545-.748.965-1.587a9.5 9.5 0 0 1-1.518-.74q.191-.14.372-.293c2.927 1.369 6.107 1.369 8.999 0q.183.152.372.294-.723.437-1.52.74.418.838.963 1.588a14.6 14.6 0 0 0 4.504-2.308c.37-3.911-.63-7.302-2.644-10.309m-9.13 8.234c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.894 0 1.614.82 1.599 1.82.001 1-.705 1.82-1.6 1.82m5.91 0c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.893 0 1.614.82 1.599 1.82 0 1-.706 1.82-1.6 1.82"/>
|
||||
</symbol>
|
||||
<symbol id="documentation-icon" viewBox="0 0 21 20">
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="m15.5 13.333 1.533 1.322c.645.555.967.833.967 1.178s-.322.623-.967 1.179L15.5 18.333m-3.333-5-1.534 1.322c-.644.555-.966.833-.966 1.178s.322.623.966 1.179l1.534 1.321"/>
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M17.167 10.836v-4.32c0-1.41 0-2.117-.224-2.68-.359-.906-1.118-1.621-2.08-1.96-.599-.21-1.349-.21-2.848-.21-2.623 0-3.935 0-4.983.369-1.684.591-3.013 1.842-3.641 3.428C3 6.449 3 7.684 3 10.154v2.122c0 2.558 0 3.838.706 4.726q.306.383.713.671c.76.536 1.79.64 3.581.66"/>
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M3 10a2.78 2.78 0 0 1 2.778-2.778c.555 0 1.209.097 1.748-.047.48-.129.854-.503.982-.982.145-.54.048-1.194.048-1.749a2.78 2.78 0 0 1 2.777-2.777"/>
|
||||
</symbol>
|
||||
<symbol id="github-icon" viewBox="0 0 19 19">
|
||||
<path fill="#08060d" fill-rule="evenodd" d="M9.356 1.85C5.05 1.85 1.57 5.356 1.57 9.694a7.84 7.84 0 0 0 5.324 7.44c.387.079.528-.168.528-.376 0-.182-.013-.805-.013-1.454-2.165.467-2.616-.935-2.616-.935-.349-.91-.864-1.143-.864-1.143-.71-.48.051-.48.051-.48.787.051 1.2.805 1.2.805.695 1.194 1.817.857 2.268.649.064-.507.27-.857.49-1.052-1.728-.182-3.545-.857-3.545-3.87 0-.857.31-1.558.8-2.104-.078-.195-.349-1 .077-2.078 0 0 .657-.208 2.14.805a7.5 7.5 0 0 1 1.946-.26c.657 0 1.328.092 1.946.26 1.483-1.013 2.14-.805 2.14-.805.426 1.078.155 1.883.078 2.078.502.546.799 1.247.799 2.104 0 3.013-1.818 3.675-3.558 3.87.284.247.528.714.528 1.454 0 1.052-.012 1.896-.012 2.156 0 .208.142.455.528.377a7.84 7.84 0 0 0 5.324-7.441c.013-4.338-3.48-7.844-7.773-7.844" clip-rule="evenodd"/>
|
||||
</symbol>
|
||||
<symbol id="social-icon" viewBox="0 0 20 20">
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M12.5 6.667a4.167 4.167 0 1 0-8.334 0 4.167 4.167 0 0 0 8.334 0"/>
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M2.5 16.667a5.833 5.833 0 0 1 8.75-5.053m3.837.474.513 1.035c.07.144.257.282.414.309l.93.155c.596.1.736.536.307.965l-.723.73a.64.64 0 0 0-.152.531l.207.903c.164.715-.213.991-.84.618l-.872-.52a.63.63 0 0 0-.577 0l-.872.52c-.624.373-1.003.094-.84-.618l.207-.903a.64.64 0 0 0-.152-.532l-.723-.729c-.426-.43-.289-.864.306-.964l.93-.156a.64.64 0 0 0 .412-.31l.513-1.034c.28-.562.735-.562 1.012 0"/>
|
||||
</symbol>
|
||||
<symbol id="x-icon" viewBox="0 0 19 19">
|
||||
<path fill="#08060d" fill-rule="evenodd" d="M1.893 1.98c.052.072 1.245 1.769 2.653 3.77l2.892 4.114c.183.261.333.48.333.486s-.068.089-.152.183l-.522.593-.765.867-3.597 4.087c-.375.426-.734.834-.798.905a1 1 0 0 0-.118.148c0 .01.236.017.664.017h.663l.729-.83c.4-.457.796-.906.879-.999a692 692 0 0 0 1.794-2.038c.034-.037.301-.34.594-.675l.551-.624.345-.392a7 7 0 0 1 .34-.374c.006 0 .93 1.306 2.052 2.903l2.084 2.965.045.063h2.275c1.87 0 2.273-.003 2.266-.021-.008-.02-1.098-1.572-3.894-5.547-2.013-2.862-2.28-3.246-2.273-3.266.008-.019.282-.332 2.085-2.38l2-2.274 1.567-1.782c.022-.028-.016-.03-.65-.03h-.674l-.3.342a871 871 0 0 1-1.782 2.025c-.067.075-.405.458-.75.852a100 100 0 0 1-.803.91c-.148.172-.299.344-.99 1.127-.304.343-.32.358-.345.327-.015-.019-.904-1.282-1.976-2.808L6.365 1.85H1.8zm1.782.91 8.078 11.294c.772 1.08 1.413 1.973 1.425 1.984.016.017.241.02 1.05.017l1.03-.004-2.694-3.766L7.796 5.75 5.722 2.852l-1.039-.004-1.039-.004z" clip-rule="evenodd"/>
|
||||
</symbol>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 4.9 KiB |
BIN
exam-viewer/public/pdfs/topic_01.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_01.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_02.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_02.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_03.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_03.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_04.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_04.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_05.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_05.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_06.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_06.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_07.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_07.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_08.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_08.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_09.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_09.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_10.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_10.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_11.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_11.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_12.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_12.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_13.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_13.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_14.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_14.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_15.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_15.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_16.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_16.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_17.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_17.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_18.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_18.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_19.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_19.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_20.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_20.pdf
Normal file
Binary file not shown.
BIN
exam-viewer/public/pdfs/topic_21.pdf
Normal file
BIN
exam-viewer/public/pdfs/topic_21.pdf
Normal file
Binary file not shown.
7085
exam-viewer/public/questions.json
Normal file
7085
exam-viewer/public/questions.json
Normal file
File diff suppressed because one or more lines are too long
8464
exam-viewer/public/questions_translated.json
Normal file
8464
exam-viewer/public/questions_translated.json
Normal file
File diff suppressed because one or more lines are too long
522
exam-viewer/src/App.vue
Normal file
522
exam-viewer/src/App.vue
Normal file
@@ -0,0 +1,522 @@
|
||||
<template>
|
||||
<div class="app-container">
|
||||
<el-container>
|
||||
<el-aside width="220px" class="sidebar">
|
||||
<div class="logo">
|
||||
<h2>MB-330 考试学习</h2>
|
||||
</div>
|
||||
<el-menu
|
||||
:default-active="String(currentTopic)"
|
||||
@select="handleTopicSelect"
|
||||
class="topic-menu"
|
||||
>
|
||||
<el-menu-item
|
||||
v-for="topic in topics"
|
||||
:key="topic"
|
||||
:index="String(topic)"
|
||||
>
|
||||
<span>Topic {{ topic }}</span>
|
||||
<el-badge :value="topicStats[topic] || 0" class="topic-badge" />
|
||||
</el-menu-item>
|
||||
</el-menu>
|
||||
</el-aside>
|
||||
|
||||
<el-main class="main-content">
|
||||
<div v-if="loading" class="loading-container">
|
||||
<el-icon class="is-loading" :size="40"><Loading /></el-icon>
|
||||
<p>加载中...</p>
|
||||
</div>
|
||||
|
||||
<template v-else-if="currentQuestion">
|
||||
<div class="question-header">
|
||||
<h3>Topic {{ currentTopic }} - Question {{ currentQuestion.question_num }}</h3>
|
||||
<div class="header-right">
|
||||
<el-button
|
||||
type="warning"
|
||||
size="small"
|
||||
@click="handleOpenPdf"
|
||||
>
|
||||
<el-icon><Document /></el-icon>
|
||||
查看原PDF
|
||||
</el-button>
|
||||
<div class="jump-control">
|
||||
<span>跳转到第</span>
|
||||
<el-input-number
|
||||
v-model="jumpQuestionNum"
|
||||
:min="1"
|
||||
:max="currentTopicQuestions.length"
|
||||
size="small"
|
||||
controls-position="right"
|
||||
/>
|
||||
<span>题</span>
|
||||
<el-button type="primary" size="small" @click="handleJumpQuestion">
|
||||
跳转
|
||||
</el-button>
|
||||
</div>
|
||||
<span class="question-progress">
|
||||
{{ currentQuestionIndex + 1 }} / {{ currentTopicQuestions.length }}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="question-content">
|
||||
<div class="bilingual-container">
|
||||
<div class="language-panel english-panel">
|
||||
<div class="panel-header">
|
||||
<el-tag type="primary">English</el-tag>
|
||||
</div>
|
||||
<div class="stem-text">
|
||||
{{ currentQuestion.stem }}
|
||||
</div>
|
||||
<div class="options-list">
|
||||
<div
|
||||
v-for="option in currentQuestion.options"
|
||||
:key="option.label"
|
||||
class="option-item"
|
||||
:class="{
|
||||
'correct-option': showAnswer && currentQuestion.answer.includes(option.label)
|
||||
}"
|
||||
>
|
||||
<span class="option-label">{{ option.label }}.</span>
|
||||
<span class="option-text">{{ option.text }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="language-panel chinese-panel">
|
||||
<div class="panel-header">
|
||||
<el-tag type="success">中文</el-tag>
|
||||
</div>
|
||||
<div class="stem-text">
|
||||
{{ currentQuestion.stem_cn || '待翻译...' }}
|
||||
</div>
|
||||
<div class="options-list">
|
||||
<div
|
||||
v-for="option in currentQuestion.options"
|
||||
:key="option.label"
|
||||
class="option-item"
|
||||
:class="{
|
||||
'correct-option': showAnswer && currentQuestion.answer.includes(option.label)
|
||||
}"
|
||||
>
|
||||
<span class="option-label">{{ option.label }}.</span>
|
||||
<span class="option-text">{{ option.text_cn || '待翻译...' }}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div v-if="showAnswer && currentQuestion.answer" class="answer-section">
|
||||
<el-alert
|
||||
:title="`正确答案: ${currentQuestion.answer}`"
|
||||
type="success"
|
||||
:closable="false"
|
||||
show-icon
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="question-actions">
|
||||
<el-button
|
||||
@click="handlePrevQuestion"
|
||||
:disabled="currentQuestionIndex === 0"
|
||||
>
|
||||
<el-icon><ArrowLeft /></el-icon>
|
||||
上一题
|
||||
</el-button>
|
||||
|
||||
<el-button
|
||||
type="primary"
|
||||
@click="handleToggleAnswer"
|
||||
>
|
||||
{{ showAnswer ? '隐藏答案' : '显示答案' }}
|
||||
</el-button>
|
||||
|
||||
<el-button
|
||||
@click="handleNextQuestion"
|
||||
:disabled="currentQuestionIndex === currentTopicQuestions.length - 1"
|
||||
>
|
||||
下一题
|
||||
<el-icon><ArrowRight /></el-icon>
|
||||
</el-button>
|
||||
</div>
|
||||
</template>
|
||||
</el-main>
|
||||
</el-container>
|
||||
|
||||
<Teleport to="body">
|
||||
<div v-if="pdfDialogVisible" class="pdf-overlay" :class="{ 'is-maximized': isMaximized }">
|
||||
<div class="pdf-modal">
|
||||
<div class="pdf-modal-header">
|
||||
<span class="pdf-modal-title">Topic {{ currentTopic }} - 原文PDF</span>
|
||||
<div class="pdf-modal-actions">
|
||||
<el-button
|
||||
type="primary"
|
||||
size="small"
|
||||
circle
|
||||
@click="toggleMaximize"
|
||||
:title="isMaximized ? '还原' : '最大化'"
|
||||
>
|
||||
<el-icon v-if="isMaximized"><Minus /></el-icon>
|
||||
<el-icon v-else><FullScreen /></el-icon>
|
||||
</el-button>
|
||||
<el-button
|
||||
type="danger"
|
||||
size="small"
|
||||
circle
|
||||
@click="closePdfDialog"
|
||||
title="关闭"
|
||||
>
|
||||
<el-icon><Close /></el-icon>
|
||||
</el-button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pdf-modal-body">
|
||||
<iframe
|
||||
:src="currentPdfUrl"
|
||||
class="pdf-iframe"
|
||||
frameborder="0"
|
||||
></iframe>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Teleport>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { onMounted, ref, watch, computed } from 'vue'
|
||||
import { useQuestionStore } from './stores/questions'
|
||||
import { storeToRefs } from 'pinia'
|
||||
|
||||
const store = useQuestionStore()
|
||||
|
||||
const {
|
||||
loading,
|
||||
topics,
|
||||
currentTopic,
|
||||
currentQuestionIndex,
|
||||
currentQuestion,
|
||||
currentTopicQuestions,
|
||||
showAnswer,
|
||||
topicStats
|
||||
} = storeToRefs(store)
|
||||
|
||||
const jumpQuestionNum = ref(1)
|
||||
const pdfDialogVisible = ref(false)
|
||||
const isMaximized = ref(false)
|
||||
|
||||
const currentPdfUrl = computed(() => {
|
||||
const topicNum = String(currentTopic.value).padStart(2, '0')
|
||||
return `/pdfs/topic_${topicNum}.pdf`
|
||||
})
|
||||
|
||||
watch(currentQuestionIndex, (newIndex) => {
|
||||
jumpQuestionNum.value = newIndex + 1
|
||||
})
|
||||
|
||||
onMounted(() => {
|
||||
store.loadQuestions()
|
||||
})
|
||||
|
||||
function handleTopicSelect(index: string) {
|
||||
store.setTopic(Number(index))
|
||||
jumpQuestionNum.value = 1
|
||||
}
|
||||
|
||||
function handlePrevQuestion() {
|
||||
store.prevQuestion()
|
||||
}
|
||||
|
||||
function handleNextQuestion() {
|
||||
store.nextQuestion()
|
||||
}
|
||||
|
||||
function handleToggleAnswer() {
|
||||
store.toggleAnswer()
|
||||
}
|
||||
|
||||
function handleJumpQuestion() {
|
||||
const targetIndex = jumpQuestionNum.value - 1
|
||||
if (targetIndex >= 0 && targetIndex < currentTopicQuestions.value.length) {
|
||||
store.jumpToQuestion(targetIndex)
|
||||
}
|
||||
}
|
||||
|
||||
function handleOpenPdf() {
|
||||
pdfDialogVisible.value = true
|
||||
}
|
||||
|
||||
function toggleMaximize() {
|
||||
isMaximized.value = !isMaximized.value
|
||||
}
|
||||
|
||||
function closePdfDialog() {
|
||||
pdfDialogVisible.value = false
|
||||
isMaximized.value = false
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.app-container {
|
||||
height: 100vh;
|
||||
background-color: #f5f7fa;
|
||||
}
|
||||
|
||||
.el-container {
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.sidebar {
|
||||
background-color: #fff;
|
||||
border-right: 1px solid #e4e7ed;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.logo {
|
||||
padding: 20px;
|
||||
text-align: center;
|
||||
border-bottom: 1px solid #e4e7ed;
|
||||
}
|
||||
|
||||
.logo h2 {
|
||||
margin: 0;
|
||||
color: #409eff;
|
||||
font-size: 18px;
|
||||
}
|
||||
|
||||
.topic-menu {
|
||||
border-right: none;
|
||||
}
|
||||
|
||||
.topic-badge {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
.main-content {
|
||||
padding: 20px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.loading-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.question-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 20px;
|
||||
padding-bottom: 15px;
|
||||
border-bottom: 1px solid #e4e7ed;
|
||||
flex-wrap: wrap;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.question-header h3 {
|
||||
margin: 0;
|
||||
color: #303133;
|
||||
}
|
||||
|
||||
.header-right {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 20px;
|
||||
}
|
||||
|
||||
.jump-control {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
font-size: 14px;
|
||||
color: #606266;
|
||||
}
|
||||
|
||||
.jump-control .el-input-number {
|
||||
width: 80px;
|
||||
}
|
||||
|
||||
.question-progress {
|
||||
color: #909399;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.question-content {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 24px;
|
||||
box-shadow: 0 2px 12px rgba(0, 0, 0, 0.05);
|
||||
}
|
||||
|
||||
.bilingual-container {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 24px;
|
||||
}
|
||||
|
||||
.language-panel {
|
||||
padding: 16px;
|
||||
border-radius: 6px;
|
||||
border: 1px solid #e4e7ed;
|
||||
}
|
||||
|
||||
.english-panel {
|
||||
background-color: #fafafa;
|
||||
}
|
||||
|
||||
.chinese-panel {
|
||||
background-color: #f0f9eb;
|
||||
}
|
||||
|
||||
.panel-header {
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
.stem-text {
|
||||
font-size: 15px;
|
||||
line-height: 1.8;
|
||||
color: #303133;
|
||||
margin-bottom: 20px;
|
||||
padding: 12px;
|
||||
background-color: #fff;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.options-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.option-item {
|
||||
display: flex;
|
||||
padding: 12px 16px;
|
||||
background-color: #fff;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #dcdfe6;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
|
||||
.option-item:hover {
|
||||
border-color: #409eff;
|
||||
background-color: #ecf5ff;
|
||||
}
|
||||
|
||||
.option-item.correct-option {
|
||||
border-color: #67c23a;
|
||||
background-color: #f0f9eb;
|
||||
}
|
||||
|
||||
.option-label {
|
||||
font-weight: bold;
|
||||
color: #409eff;
|
||||
margin-right: 8px;
|
||||
min-width: 24px;
|
||||
}
|
||||
|
||||
.option-text {
|
||||
color: #303133;
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
.answer-section {
|
||||
margin-top: 20px;
|
||||
padding-top: 20px;
|
||||
border-top: 1px solid #e4e7ed;
|
||||
}
|
||||
|
||||
.question-actions {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
gap: 16px;
|
||||
margin-top: 24px;
|
||||
}
|
||||
|
||||
.pdf-overlay {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
background-color: rgba(0, 0, 0, 0.5);
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
z-index: 2000;
|
||||
}
|
||||
|
||||
.pdf-modal {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
width: 90%;
|
||||
height: 90vh;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
.pdf-overlay.is-maximized .pdf-modal {
|
||||
width: 100%;
|
||||
height: 100vh;
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
.pdf-modal-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 12px 16px;
|
||||
background-color: #f5f7fa;
|
||||
border-bottom: 1px solid #e4e7ed;
|
||||
border-radius: 8px 8px 0 0;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.pdf-overlay.is-maximized .pdf-modal-header {
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
.pdf-modal-title {
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
color: #303133;
|
||||
}
|
||||
|
||||
.pdf-modal-actions {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.pdf-modal-body {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.pdf-iframe {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
border: none;
|
||||
}
|
||||
|
||||
@media (max-width: 1200px) {
|
||||
.bilingual-container {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.question-header {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.header-right {
|
||||
width: 100%;
|
||||
justify-content: space-between;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
BIN
exam-viewer/src/assets/hero.png
Normal file
BIN
exam-viewer/src/assets/hero.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
1
exam-viewer/src/assets/vite.svg
Normal file
1
exam-viewer/src/assets/vite.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 8.5 KiB |
1
exam-viewer/src/assets/vue.svg
Normal file
1
exam-viewer/src/assets/vue.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="37.07" height="36" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 198"><path fill="#41B883" d="M204.8 0H256L128 220.8L0 0h97.92L128 51.2L157.44 0h47.36Z"></path><path fill="#41B883" d="m0 0l128 220.8L256 0h-51.2L128 132.48L50.56 0H0Z"></path><path fill="#35495E" d="M50.56 0L128 133.12L204.8 0h-47.36L128 51.2L97.92 0H50.56Z"></path></svg>
|
||||
|
After Width: | Height: | Size: 496 B |
93
exam-viewer/src/components/HelloWorld.vue
Normal file
93
exam-viewer/src/components/HelloWorld.vue
Normal file
@@ -0,0 +1,93 @@
|
||||
<script setup lang="ts">
|
||||
import { ref } from 'vue'
|
||||
import viteLogo from '../assets/vite.svg'
|
||||
import heroImg from '../assets/hero.png'
|
||||
import vueLogo from '../assets/vue.svg'
|
||||
|
||||
const count = ref(0)
|
||||
</script>
|
||||
|
||||
<template>
|
||||
<section id="center">
|
||||
<div class="hero">
|
||||
<img :src="heroImg" class="base" width="170" height="179" alt="" />
|
||||
<img :src="vueLogo" class="framework" alt="Vue logo" />
|
||||
<img :src="viteLogo" class="vite" alt="Vite logo" />
|
||||
</div>
|
||||
<div>
|
||||
<h1>Get started</h1>
|
||||
<p>Edit <code>src/App.vue</code> and save to test <code>HMR</code></p>
|
||||
</div>
|
||||
<button class="counter" @click="count++">Count is {{ count }}</button>
|
||||
</section>
|
||||
|
||||
<div class="ticks"></div>
|
||||
|
||||
<section id="next-steps">
|
||||
<div id="docs">
|
||||
<svg class="icon" role="presentation" aria-hidden="true">
|
||||
<use href="/icons.svg#documentation-icon"></use>
|
||||
</svg>
|
||||
<h2>Documentation</h2>
|
||||
<p>Your questions, answered</p>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="https://vite.dev/" target="_blank">
|
||||
<img class="logo" :src="viteLogo" alt="" />
|
||||
Explore Vite
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://vuejs.org/" target="_blank">
|
||||
<img class="button-icon" :src="vueLogo" alt="" />
|
||||
Learn more
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="social">
|
||||
<svg class="icon" role="presentation" aria-hidden="true">
|
||||
<use href="/icons.svg#social-icon"></use>
|
||||
</svg>
|
||||
<h2>Connect with us</h2>
|
||||
<p>Join the Vite community</p>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="https://github.com/vitejs/vite" target="_blank">
|
||||
<svg class="button-icon" role="presentation" aria-hidden="true">
|
||||
<use href="/icons.svg#github-icon"></use>
|
||||
</svg>
|
||||
GitHub
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://chat.vite.dev/" target="_blank">
|
||||
<svg class="button-icon" role="presentation" aria-hidden="true">
|
||||
<use href="/icons.svg#discord-icon"></use>
|
||||
</svg>
|
||||
Discord
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://x.com/vite_js" target="_blank">
|
||||
<svg class="button-icon" role="presentation" aria-hidden="true">
|
||||
<use href="/icons.svg#x-icon"></use>
|
||||
</svg>
|
||||
X.com
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://bsky.app/profile/vite.dev" target="_blank">
|
||||
<svg class="button-icon" role="presentation" aria-hidden="true">
|
||||
<use href="/icons.svg#bluesky-icon"></use>
|
||||
</svg>
|
||||
Bluesky
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<div class="ticks"></div>
|
||||
<section id="spacer"></section>
|
||||
</template>
|
||||
18
exam-viewer/src/main.ts
Normal file
18
exam-viewer/src/main.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import { createApp } from 'vue'
|
||||
import { createPinia } from 'pinia'
|
||||
import ElementPlus from 'element-plus'
|
||||
import 'element-plus/dist/index.css'
|
||||
import * as ElementPlusIconsVue from '@element-plus/icons-vue'
|
||||
import App from './App.vue'
|
||||
import './style.css'
|
||||
|
||||
const app = createApp(App)
|
||||
const pinia = createPinia()
|
||||
|
||||
for (const [key, component] of Object.entries(ElementPlusIconsVue)) {
|
||||
app.component(key, component)
|
||||
}
|
||||
|
||||
app.use(pinia)
|
||||
app.use(ElementPlus)
|
||||
app.mount('#app')
|
||||
111
exam-viewer/src/stores/questions.ts
Normal file
111
exam-viewer/src/stores/questions.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import { defineStore } from 'pinia'
|
||||
import { ref, computed } from 'vue'
|
||||
|
||||
export interface QuestionOption {
|
||||
label: string
|
||||
text: string
|
||||
text_cn?: string
|
||||
}
|
||||
|
||||
export interface Question {
|
||||
topic: number
|
||||
question_num: number
|
||||
stem: string
|
||||
stem_cn?: string
|
||||
options: QuestionOption[]
|
||||
answer: string
|
||||
}
|
||||
|
||||
export const useQuestionStore = defineStore('questions', () => {
|
||||
const questions = ref<Question[]>([])
|
||||
const currentTopic = ref<number>(1)
|
||||
const currentQuestionIndex = ref<number>(0)
|
||||
const showAnswer = ref<boolean>(false)
|
||||
const loading = ref<boolean>(true)
|
||||
|
||||
const topics = computed(() => {
|
||||
const topicSet = new Set(questions.value.map(q => q.topic))
|
||||
return Array.from(topicSet).sort((a, b) => a - b)
|
||||
})
|
||||
|
||||
const currentTopicQuestions = computed(() => {
|
||||
return questions.value.filter(q => q.topic === currentTopic.value)
|
||||
})
|
||||
|
||||
const currentQuestion = computed(() => {
|
||||
return currentTopicQuestions.value[currentQuestionIndex.value] || null
|
||||
})
|
||||
|
||||
const topicStats = computed(() => {
|
||||
const stats: Record<number, number> = {}
|
||||
questions.value.forEach(q => {
|
||||
stats[q.topic] = (stats[q.topic] || 0) + 1
|
||||
})
|
||||
return stats
|
||||
})
|
||||
|
||||
async function loadQuestions() {
|
||||
try {
|
||||
loading.value = true
|
||||
let response = await fetch('/questions_translated.json')
|
||||
if (!response.ok) {
|
||||
response = await fetch('/questions.json')
|
||||
}
|
||||
const data = await response.json()
|
||||
questions.value = data
|
||||
} catch (error) {
|
||||
console.error('Failed to load questions:', error)
|
||||
} finally {
|
||||
loading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
function setTopic(topic: number) {
|
||||
currentTopic.value = topic
|
||||
currentQuestionIndex.value = 0
|
||||
showAnswer.value = false
|
||||
}
|
||||
|
||||
function nextQuestion() {
|
||||
if (currentQuestionIndex.value < currentTopicQuestions.value.length - 1) {
|
||||
currentQuestionIndex.value++
|
||||
showAnswer.value = false
|
||||
}
|
||||
}
|
||||
|
||||
function prevQuestion() {
|
||||
if (currentQuestionIndex.value > 0) {
|
||||
currentQuestionIndex.value--
|
||||
showAnswer.value = false
|
||||
}
|
||||
}
|
||||
|
||||
function jumpToQuestion(index: number) {
|
||||
if (index >= 0 && index < currentTopicQuestions.value.length) {
|
||||
currentQuestionIndex.value = index
|
||||
showAnswer.value = false
|
||||
}
|
||||
}
|
||||
|
||||
function toggleAnswer() {
|
||||
showAnswer.value = !showAnswer.value
|
||||
}
|
||||
|
||||
return {
|
||||
questions,
|
||||
currentTopic,
|
||||
currentQuestionIndex,
|
||||
showAnswer,
|
||||
loading,
|
||||
topics,
|
||||
currentTopicQuestions,
|
||||
currentQuestion,
|
||||
topicStats,
|
||||
loadQuestions,
|
||||
setTopic,
|
||||
nextQuestion,
|
||||
prevQuestion,
|
||||
jumpToQuestion,
|
||||
toggleAnswer
|
||||
}
|
||||
})
|
||||
34
exam-viewer/src/style.css
Normal file
34
exam-viewer/src/style.css
Normal file
@@ -0,0 +1,34 @@
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
html, body {
|
||||
height: 100%;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
}
|
||||
|
||||
#app {
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar {
|
||||
width: 6px;
|
||||
height: 6px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-track {
|
||||
background: #f1f1f1;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb {
|
||||
background: #c1c1c1;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb:hover {
|
||||
background: #a8a8a8;
|
||||
}
|
||||
16
exam-viewer/tsconfig.app.json
Normal file
16
exam-viewer/tsconfig.app.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"extends": "@vue/tsconfig/tsconfig.dom.json",
|
||||
"compilerOptions": {
|
||||
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
|
||||
"types": ["vite/client"],
|
||||
|
||||
/* Linting */
|
||||
"strict": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"erasableSyntaxOnly": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"noUncheckedSideEffectImports": true
|
||||
},
|
||||
"include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.vue"]
|
||||
}
|
||||
7
exam-viewer/tsconfig.json
Normal file
7
exam-viewer/tsconfig.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"files": [],
|
||||
"references": [
|
||||
{ "path": "./tsconfig.app.json" },
|
||||
{ "path": "./tsconfig.node.json" }
|
||||
]
|
||||
}
|
||||
26
exam-viewer/tsconfig.node.json
Normal file
26
exam-viewer/tsconfig.node.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
|
||||
"target": "ES2023",
|
||||
"lib": ["ES2023"],
|
||||
"module": "ESNext",
|
||||
"types": ["node"],
|
||||
"skipLibCheck": true,
|
||||
|
||||
/* Bundler mode */
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"moduleDetection": "force",
|
||||
"noEmit": true,
|
||||
|
||||
/* Linting */
|
||||
"strict": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"erasableSyntaxOnly": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"noUncheckedSideEffectImports": true
|
||||
},
|
||||
"include": ["vite.config.ts"]
|
||||
}
|
||||
7
exam-viewer/vite.config.ts
Normal file
7
exam-viewer/vite.config.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
import { defineConfig } from 'vite'
|
||||
import vue from '@vitejs/plugin-vue'
|
||||
|
||||
// https://vite.dev/config/
|
||||
export default defineConfig({
|
||||
plugins: [vue()],
|
||||
})
|
||||
BIN
exam_data/pdfs/topic_01.pdf
Normal file
BIN
exam_data/pdfs/topic_01.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_02.pdf
Normal file
BIN
exam_data/pdfs/topic_02.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_03.pdf
Normal file
BIN
exam_data/pdfs/topic_03.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_04.pdf
Normal file
BIN
exam_data/pdfs/topic_04.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_05.pdf
Normal file
BIN
exam_data/pdfs/topic_05.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_06.pdf
Normal file
BIN
exam_data/pdfs/topic_06.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_07.pdf
Normal file
BIN
exam_data/pdfs/topic_07.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_08.pdf
Normal file
BIN
exam_data/pdfs/topic_08.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_09.pdf
Normal file
BIN
exam_data/pdfs/topic_09.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_10.pdf
Normal file
BIN
exam_data/pdfs/topic_10.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_11.pdf
Normal file
BIN
exam_data/pdfs/topic_11.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_12.pdf
Normal file
BIN
exam_data/pdfs/topic_12.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_13.pdf
Normal file
BIN
exam_data/pdfs/topic_13.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_14.pdf
Normal file
BIN
exam_data/pdfs/topic_14.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_15.pdf
Normal file
BIN
exam_data/pdfs/topic_15.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_16.pdf
Normal file
BIN
exam_data/pdfs/topic_16.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_17.pdf
Normal file
BIN
exam_data/pdfs/topic_17.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_18.pdf
Normal file
BIN
exam_data/pdfs/topic_18.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_19.pdf
Normal file
BIN
exam_data/pdfs/topic_19.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_20.pdf
Normal file
BIN
exam_data/pdfs/topic_20.pdf
Normal file
Binary file not shown.
BIN
exam_data/pdfs/topic_21.pdf
Normal file
BIN
exam_data/pdfs/topic_21.pdf
Normal file
Binary file not shown.
7085
exam_data/questions.json
Normal file
7085
exam_data/questions.json
Normal file
File diff suppressed because one or more lines are too long
8464
exam_data/questions_translated.json
Normal file
8464
exam_data/questions_translated.json
Normal file
File diff suppressed because one or more lines are too long
165
extract_questions_final.py
Normal file
165
extract_questions_final.py
Normal file
@@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
精确提取PDF题目内容 - 最终版
|
||||
"""
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
def split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir):
|
||||
"""
|
||||
按Topic切割PDF并精确提取题目内容
|
||||
"""
|
||||
with open(topics_info_path, 'r', encoding='utf-8') as f:
|
||||
topics = json.load(f)
|
||||
|
||||
reader = PdfReader(pdf_path)
|
||||
total_pages = len(reader.pages)
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
pdf_dir = os.path.join(output_dir, 'pdfs')
|
||||
os.makedirs(pdf_dir, exist_ok=True)
|
||||
|
||||
all_questions = []
|
||||
|
||||
for topic in topics:
|
||||
topic_num = topic['topic_num']
|
||||
start_page = topic['start_page']
|
||||
end_page = topic['end_page']
|
||||
|
||||
writer = PdfWriter()
|
||||
for page_num in range(start_page, min(end_page + 1, total_pages)):
|
||||
writer.add_page(reader.pages[page_num])
|
||||
|
||||
pdf_output_path = os.path.join(pdf_dir, f'topic_{topic_num:02d}.pdf')
|
||||
with open(pdf_output_path, 'wb') as f:
|
||||
writer.write(f)
|
||||
print(f"已保存: {pdf_output_path}")
|
||||
|
||||
print(f"正在提取 Topic {topic_num} 的题目内容...")
|
||||
topic_questions = extract_questions_precise(reader, start_page, end_page, topic_num)
|
||||
all_questions.extend(topic_questions)
|
||||
print(f" Topic {topic_num}: 提取了 {len(topic_questions)} 道题")
|
||||
|
||||
questions_json_path = os.path.join(output_dir, 'questions.json')
|
||||
with open(questions_json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(all_questions, f, ensure_ascii=False, indent=2)
|
||||
print(f"\n所有题目已保存到: {questions_json_path}")
|
||||
print(f"总共提取了 {len(all_questions)} 道题")
|
||||
|
||||
return all_questions
|
||||
|
||||
def extract_questions_precise(reader, start_page, end_page, topic_num):
|
||||
"""
|
||||
精确提取题目内容
|
||||
"""
|
||||
questions = []
|
||||
|
||||
full_text = ""
|
||||
for page_num in range(start_page, end_page + 1):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
full_text += text + "\n"
|
||||
|
||||
question_pattern = re.compile(
|
||||
r'Question\s+#(\d+)\s*\n(.*?)(?=Question\s+#\d+|Topic\s+\d+|$)',
|
||||
re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
|
||||
matches = question_pattern.findall(full_text)
|
||||
|
||||
for match in matches:
|
||||
q_num = int(match[0])
|
||||
content = match[1].strip()
|
||||
|
||||
question_data = parse_question_content(topic_num, q_num, content)
|
||||
if question_data:
|
||||
questions.append(question_data)
|
||||
|
||||
return questions
|
||||
|
||||
def parse_question_content(topic_num, q_num, content):
|
||||
"""
|
||||
解析题目内容,提取题干、选项和答案
|
||||
"""
|
||||
lines = content.split('\n')
|
||||
|
||||
question_stem = ""
|
||||
options = []
|
||||
correct_answer = ""
|
||||
|
||||
option_pattern = re.compile(r'^([A-Z])\.\s*(.*)', re.IGNORECASE)
|
||||
answer_inline_pattern = re.compile(r'\s+([A-Z]{1,4})\s*$')
|
||||
answer_pattern = re.compile(r'Correct Answer:\s*([A-Z,\s]+)', re.IGNORECASE)
|
||||
comments_pattern = re.compile(r'^Comments', re.IGNORECASE)
|
||||
|
||||
current_section = "stem"
|
||||
current_option = None
|
||||
current_option_text = ""
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if comments_pattern.match(line):
|
||||
break
|
||||
|
||||
answer_match = answer_pattern.search(line)
|
||||
if answer_match:
|
||||
correct_answer = answer_match.group(1).strip().upper()
|
||||
line = answer_pattern.sub('', line).strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
option_match = option_pattern.match(line)
|
||||
if option_match:
|
||||
if current_option is not None and current_option_text:
|
||||
options.append({
|
||||
'label': current_option,
|
||||
'text': current_option_text.strip()
|
||||
})
|
||||
current_option = option_match.group(1).upper()
|
||||
current_option_text = option_match.group(2)
|
||||
current_section = "options"
|
||||
elif current_section == "options" and current_option is not None:
|
||||
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
|
||||
current_option_text += " " + line
|
||||
elif current_section == "stem":
|
||||
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
|
||||
question_stem += " " + line
|
||||
|
||||
if current_option is not None and current_option_text:
|
||||
inline_answer = answer_inline_pattern.search(current_option_text)
|
||||
if inline_answer and not correct_answer:
|
||||
possible_answer = inline_answer.group(1)
|
||||
if all(c in 'ABCD' for c in possible_answer):
|
||||
correct_answer = possible_answer
|
||||
current_option_text = answer_inline_pattern.sub('', current_option_text)
|
||||
|
||||
options.append({
|
||||
'label': current_option,
|
||||
'text': current_option_text.strip()
|
||||
})
|
||||
|
||||
question_stem = question_stem.strip()
|
||||
|
||||
if not question_stem and not options:
|
||||
return None
|
||||
|
||||
return {
|
||||
'topic': topic_num,
|
||||
'question_num': q_num,
|
||||
'stem': question_stem,
|
||||
'options': options,
|
||||
'answer': correct_answer
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
|
||||
topics_info_path = '/Users/duguoyou/D365/topics_info.json'
|
||||
output_dir = '/Users/duguoyou/D365/exam_data'
|
||||
|
||||
questions = split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir)
|
||||
155
extract_questions_v2.py
Normal file
155
extract_questions_v2.py
Normal file
@@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
精确提取PDF题目内容 - 改进版
|
||||
"""
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
def split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir):
|
||||
"""
|
||||
按Topic切割PDF并精确提取题目内容
|
||||
"""
|
||||
with open(topics_info_path, 'r', encoding='utf-8') as f:
|
||||
topics = json.load(f)
|
||||
|
||||
reader = PdfReader(pdf_path)
|
||||
total_pages = len(reader.pages)
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
pdf_dir = os.path.join(output_dir, 'pdfs')
|
||||
os.makedirs(pdf_dir, exist_ok=True)
|
||||
|
||||
all_questions = []
|
||||
|
||||
for topic in topics:
|
||||
topic_num = topic['topic_num']
|
||||
start_page = topic['start_page']
|
||||
end_page = topic['end_page']
|
||||
|
||||
writer = PdfWriter()
|
||||
for page_num in range(start_page, min(end_page + 1, total_pages)):
|
||||
writer.add_page(reader.pages[page_num])
|
||||
|
||||
pdf_output_path = os.path.join(pdf_dir, f'topic_{topic_num:02d}.pdf')
|
||||
with open(pdf_output_path, 'wb') as f:
|
||||
writer.write(f)
|
||||
print(f"已保存: {pdf_output_path}")
|
||||
|
||||
print(f"正在提取 Topic {topic_num} 的题目内容...")
|
||||
topic_questions = extract_questions_precise(reader, start_page, end_page, topic_num)
|
||||
all_questions.extend(topic_questions)
|
||||
print(f" Topic {topic_num}: 提取了 {len(topic_questions)} 道题")
|
||||
|
||||
questions_json_path = os.path.join(output_dir, 'questions.json')
|
||||
with open(questions_json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(all_questions, f, ensure_ascii=False, indent=2)
|
||||
print(f"\n所有题目已保存到: {questions_json_path}")
|
||||
print(f"总共提取了 {len(all_questions)} 道题")
|
||||
|
||||
return all_questions
|
||||
|
||||
def extract_questions_precise(reader, start_page, end_page, topic_num):
|
||||
"""
|
||||
精确提取题目内容
|
||||
"""
|
||||
questions = []
|
||||
|
||||
full_text = ""
|
||||
for page_num in range(start_page, end_page + 1):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
full_text += text + "\n"
|
||||
|
||||
question_pattern = re.compile(
|
||||
r'Question\s+#(\d+)\s*\n(.*?)(?=Question\s+#\d+|Topic\s+\d+|$)',
|
||||
re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
|
||||
matches = question_pattern.findall(full_text)
|
||||
|
||||
for match in matches:
|
||||
q_num = int(match[0])
|
||||
content = match[1].strip()
|
||||
|
||||
question_data = parse_question_content(topic_num, q_num, content)
|
||||
if question_data:
|
||||
questions.append(question_data)
|
||||
|
||||
return questions
|
||||
|
||||
def parse_question_content(topic_num, q_num, content):
|
||||
"""
|
||||
解析题目内容,提取题干、选项和答案
|
||||
"""
|
||||
lines = content.split('\n')
|
||||
|
||||
question_stem = ""
|
||||
options = []
|
||||
correct_answer = ""
|
||||
|
||||
option_pattern = re.compile(r'^([A-Z])\.\s*(.*)', re.IGNORECASE)
|
||||
answer_pattern = re.compile(r'Correct Answer:\s*([A-Z,\s]+)', re.IGNORECASE)
|
||||
comments_pattern = re.compile(r'^Comments', re.IGNORECASE)
|
||||
|
||||
current_section = "stem"
|
||||
current_option = None
|
||||
current_option_text = ""
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if comments_pattern.match(line):
|
||||
break
|
||||
|
||||
answer_match = answer_pattern.search(line)
|
||||
if answer_match:
|
||||
correct_answer = answer_match.group(1).strip().upper()
|
||||
continue
|
||||
|
||||
option_match = option_pattern.match(line)
|
||||
if option_match:
|
||||
if current_option is not None and current_option_text:
|
||||
options.append({
|
||||
'label': current_option,
|
||||
'text': current_option_text.strip()
|
||||
})
|
||||
current_option = option_match.group(1).upper()
|
||||
current_option_text = option_match.group(2)
|
||||
current_section = "options"
|
||||
elif current_section == "options" and current_option is not None:
|
||||
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote')):
|
||||
current_option_text += " " + line
|
||||
elif current_section == "stem":
|
||||
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
|
||||
question_stem += " " + line
|
||||
|
||||
if current_option is not None and current_option_text:
|
||||
options.append({
|
||||
'label': current_option,
|
||||
'text': current_option_text.strip()
|
||||
})
|
||||
|
||||
question_stem = question_stem.strip()
|
||||
|
||||
if not question_stem and not options:
|
||||
return None
|
||||
|
||||
return {
|
||||
'topic': topic_num,
|
||||
'question_num': q_num,
|
||||
'stem': question_stem,
|
||||
'options': options,
|
||||
'answer': correct_answer
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
|
||||
topics_info_path = '/Users/duguoyou/D365/topics_info.json'
|
||||
output_dir = '/Users/duguoyou/D365/exam_data'
|
||||
|
||||
questions = split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir)
|
||||
157
extract_questions_v3.py
Normal file
157
extract_questions_v3.py
Normal file
@@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
精确提取PDF题目内容 - 最终版
|
||||
"""
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
def split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir):
|
||||
"""
|
||||
按Topic切割PDF并精确提取题目内容
|
||||
"""
|
||||
with open(topics_info_path, 'r', encoding='utf-8') as f:
|
||||
topics = json.load(f)
|
||||
|
||||
reader = PdfReader(pdf_path)
|
||||
total_pages = len(reader.pages)
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
pdf_dir = os.path.join(output_dir, 'pdfs')
|
||||
os.makedirs(pdf_dir, exist_ok=True)
|
||||
|
||||
all_questions = []
|
||||
|
||||
for topic in topics:
|
||||
topic_num = topic['topic_num']
|
||||
start_page = topic['start_page']
|
||||
end_page = topic['end_page']
|
||||
|
||||
writer = PdfWriter()
|
||||
for page_num in range(start_page, min(end_page + 1, total_pages)):
|
||||
writer.add_page(reader.pages[page_num])
|
||||
|
||||
pdf_output_path = os.path.join(pdf_dir, f'topic_{topic_num:02d}.pdf')
|
||||
with open(pdf_output_path, 'wb') as f:
|
||||
writer.write(f)
|
||||
print(f"已保存: {pdf_output_path}")
|
||||
|
||||
print(f"正在提取 Topic {topic_num} 的题目内容...")
|
||||
topic_questions = extract_questions_precise(reader, start_page, end_page, topic_num)
|
||||
all_questions.extend(topic_questions)
|
||||
print(f" Topic {topic_num}: 提取了 {len(topic_questions)} 道题")
|
||||
|
||||
questions_json_path = os.path.join(output_dir, 'questions.json')
|
||||
with open(questions_json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(all_questions, f, ensure_ascii=False, indent=2)
|
||||
print(f"\n所有题目已保存到: {questions_json_path}")
|
||||
print(f"总共提取了 {len(all_questions)} 道题")
|
||||
|
||||
return all_questions
|
||||
|
||||
def extract_questions_precise(reader, start_page, end_page, topic_num):
|
||||
"""
|
||||
精确提取题目内容
|
||||
"""
|
||||
questions = []
|
||||
|
||||
full_text = ""
|
||||
for page_num in range(start_page, end_page + 1):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
full_text += text + "\n"
|
||||
|
||||
question_pattern = re.compile(
|
||||
r'Question\s+#(\d+)\s*\n(.*?)(?=Question\s+#\d+|Topic\s+\d+|$)',
|
||||
re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
|
||||
matches = question_pattern.findall(full_text)
|
||||
|
||||
for match in matches:
|
||||
q_num = int(match[0])
|
||||
content = match[1].strip()
|
||||
|
||||
question_data = parse_question_content(topic_num, q_num, content)
|
||||
if question_data:
|
||||
questions.append(question_data)
|
||||
|
||||
return questions
|
||||
|
||||
def parse_question_content(topic_num, q_num, content):
|
||||
"""
|
||||
解析题目内容,提取题干、选项和答案
|
||||
"""
|
||||
lines = content.split('\n')
|
||||
|
||||
question_stem = ""
|
||||
options = []
|
||||
correct_answer = ""
|
||||
|
||||
option_pattern = re.compile(r'^([A-Z])\.\s*(.*)', re.IGNORECASE)
|
||||
answer_pattern = re.compile(r'Correct Answer:\s*([A-Z,\s]+)', re.IGNORECASE)
|
||||
comments_pattern = re.compile(r'^Comments', re.IGNORECASE)
|
||||
|
||||
current_section = "stem"
|
||||
current_option = None
|
||||
current_option_text = ""
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if comments_pattern.match(line):
|
||||
break
|
||||
|
||||
answer_match = answer_pattern.search(line)
|
||||
if answer_match:
|
||||
correct_answer = answer_match.group(1).strip().upper()
|
||||
line = answer_pattern.sub('', line).strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
option_match = option_pattern.match(line)
|
||||
if option_match:
|
||||
if current_option is not None and current_option_text:
|
||||
options.append({
|
||||
'label': current_option,
|
||||
'text': current_option_text.strip()
|
||||
})
|
||||
current_option = option_match.group(1).upper()
|
||||
current_option_text = option_match.group(2)
|
||||
current_section = "options"
|
||||
elif current_section == "options" and current_option is not None:
|
||||
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
|
||||
current_option_text += " " + line
|
||||
elif current_section == "stem":
|
||||
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
|
||||
question_stem += " " + line
|
||||
|
||||
if current_option is not None and current_option_text:
|
||||
options.append({
|
||||
'label': current_option,
|
||||
'text': current_option_text.strip()
|
||||
})
|
||||
|
||||
question_stem = question_stem.strip()
|
||||
|
||||
if not question_stem and not options:
|
||||
return None
|
||||
|
||||
return {
|
||||
'topic': topic_num,
|
||||
'question_num': q_num,
|
||||
'stem': question_stem,
|
||||
'options': options,
|
||||
'answer': correct_answer
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
|
||||
topics_info_path = '/Users/duguoyou/D365/topics_info.json'
|
||||
output_dir = '/Users/duguoyou/D365/exam_data'
|
||||
|
||||
questions = split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir)
|
||||
108
split_pdf.py
Normal file
108
split_pdf.py
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
切割PDF文件并提取题目内容
|
||||
"""
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
def split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir):
|
||||
"""
|
||||
按Topic切割PDF并提取题目内容
|
||||
"""
|
||||
with open(topics_info_path, 'r', encoding='utf-8') as f:
|
||||
topics = json.load(f)
|
||||
|
||||
reader = PdfReader(pdf_path)
|
||||
total_pages = len(reader.pages)
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
pdf_dir = os.path.join(output_dir, 'pdfs')
|
||||
os.makedirs(pdf_dir, exist_ok=True)
|
||||
|
||||
all_questions = []
|
||||
|
||||
for topic in topics:
|
||||
topic_num = topic['topic_num']
|
||||
start_page = topic['start_page']
|
||||
end_page = topic['end_page']
|
||||
|
||||
writer = PdfWriter()
|
||||
for page_num in range(start_page, min(end_page + 1, total_pages)):
|
||||
writer.add_page(reader.pages[page_num])
|
||||
|
||||
pdf_output_path = os.path.join(pdf_dir, f'topic_{topic_num:02d}.pdf')
|
||||
with open(pdf_output_path, 'wb') as f:
|
||||
writer.write(f)
|
||||
print(f"已保存: {pdf_output_path}")
|
||||
|
||||
print(f"正在提取 Topic {topic_num} 的题目内容...")
|
||||
topic_questions = extract_questions_from_pages(reader, start_page, end_page, topic_num)
|
||||
all_questions.extend(topic_questions)
|
||||
print(f" Topic {topic_num}: 提取了 {len(topic_questions)} 道题")
|
||||
|
||||
questions_json_path = os.path.join(output_dir, 'questions.json')
|
||||
with open(questions_json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(all_questions, f, ensure_ascii=False, indent=2)
|
||||
print(f"\n所有题目已保存到: {questions_json_path}")
|
||||
print(f"总共提取了 {len(all_questions)} 道题")
|
||||
|
||||
return all_questions
|
||||
|
||||
def extract_questions_from_pages(reader, start_page, end_page, topic_num):
|
||||
"""
|
||||
从指定页面范围提取题目内容
|
||||
"""
|
||||
questions = []
|
||||
current_question = None
|
||||
question_pattern = re.compile(r'Question\s+#(\d+)', re.IGNORECASE)
|
||||
|
||||
for page_num in range(start_page, end_page + 1):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
|
||||
if not text:
|
||||
continue
|
||||
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
q_match = question_pattern.search(line)
|
||||
if q_match:
|
||||
if current_question:
|
||||
questions.append(current_question)
|
||||
|
||||
q_num = int(q_match.group(1))
|
||||
current_question = {
|
||||
'topic': topic_num,
|
||||
'question_num': q_num,
|
||||
'content': line,
|
||||
'options': [],
|
||||
'answer': None,
|
||||
'explanation': None
|
||||
}
|
||||
elif current_question:
|
||||
if line.startswith('A.') or line.startswith('B.') or line.startswith('C.') or line.startswith('D.'):
|
||||
current_question['options'].append(line)
|
||||
elif line.startswith('Correct Answer:'):
|
||||
current_question['answer'] = line.replace('Correct Answer:', '').strip()
|
||||
elif line.startswith('Comments'):
|
||||
current_question['explanation'] = ''
|
||||
elif current_question.get('explanation') is not None:
|
||||
current_question['explanation'] += ' ' + line
|
||||
|
||||
if current_question:
|
||||
questions.append(current_question)
|
||||
|
||||
return questions
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
|
||||
topics_info_path = '/Users/duguoyou/D365/topics_info.json'
|
||||
output_dir = '/Users/duguoyou/D365/exam_data'
|
||||
|
||||
questions = split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir)
|
||||
610
topics_info.json
Normal file
610
topics_info.json
Normal file
@@ -0,0 +1,610 @@
|
||||
[
|
||||
{
|
||||
"topic_num": 1,
|
||||
"start_page": 0,
|
||||
"end_page": 71,
|
||||
"question_count": 36,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 2,
|
||||
"start_page": 73,
|
||||
"end_page": 233,
|
||||
"question_count": 64,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63,
|
||||
64
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 3,
|
||||
"start_page": 235,
|
||||
"end_page": 456,
|
||||
"question_count": 99,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63,
|
||||
64,
|
||||
65,
|
||||
66,
|
||||
67,
|
||||
68,
|
||||
69,
|
||||
70,
|
||||
71,
|
||||
72,
|
||||
73,
|
||||
74,
|
||||
75,
|
||||
76,
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
80,
|
||||
81,
|
||||
82,
|
||||
83,
|
||||
84,
|
||||
85,
|
||||
86,
|
||||
87,
|
||||
88,
|
||||
89,
|
||||
90,
|
||||
91,
|
||||
92,
|
||||
93,
|
||||
94,
|
||||
95,
|
||||
96,
|
||||
97,
|
||||
98,
|
||||
99
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 4,
|
||||
"start_page": 458,
|
||||
"end_page": 637,
|
||||
"question_count": 77,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63,
|
||||
64,
|
||||
65,
|
||||
66,
|
||||
67,
|
||||
68,
|
||||
69,
|
||||
70,
|
||||
71,
|
||||
72,
|
||||
73,
|
||||
74,
|
||||
75,
|
||||
76,
|
||||
77
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 5,
|
||||
"start_page": 639,
|
||||
"end_page": 779,
|
||||
"question_count": 63,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
59,
|
||||
60,
|
||||
61,
|
||||
62,
|
||||
63
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 6,
|
||||
"start_page": 781,
|
||||
"end_page": 911,
|
||||
"question_count": 53,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36,
|
||||
37,
|
||||
38,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
42,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
47,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 7,
|
||||
"start_page": 913,
|
||||
"end_page": 920,
|
||||
"question_count": 4,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 8,
|
||||
"start_page": 921,
|
||||
"end_page": 921,
|
||||
"question_count": 1,
|
||||
"questions": [
|
||||
1
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 9,
|
||||
"start_page": 925,
|
||||
"end_page": 925,
|
||||
"question_count": 1,
|
||||
"questions": [
|
||||
1
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 10,
|
||||
"start_page": 929,
|
||||
"end_page": 941,
|
||||
"question_count": 6,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 11,
|
||||
"start_page": 942,
|
||||
"end_page": 942,
|
||||
"question_count": 1,
|
||||
"questions": [
|
||||
1
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 12,
|
||||
"start_page": 946,
|
||||
"end_page": 950,
|
||||
"question_count": 2,
|
||||
"questions": [
|
||||
1,
|
||||
2
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 13,
|
||||
"start_page": 952,
|
||||
"end_page": 966,
|
||||
"question_count": 7,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 14,
|
||||
"start_page": 968,
|
||||
"end_page": 975,
|
||||
"question_count": 3,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 15,
|
||||
"start_page": 977,
|
||||
"end_page": 984,
|
||||
"question_count": 3,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 16,
|
||||
"start_page": 986,
|
||||
"end_page": 996,
|
||||
"question_count": 4,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 17,
|
||||
"start_page": 999,
|
||||
"end_page": 999,
|
||||
"question_count": 1,
|
||||
"questions": [
|
||||
1
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 18,
|
||||
"start_page": 1002,
|
||||
"end_page": 1013,
|
||||
"question_count": 5,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 19,
|
||||
"start_page": 1015,
|
||||
"end_page": 1024,
|
||||
"question_count": 4,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 20,
|
||||
"start_page": 1026,
|
||||
"end_page": 1030,
|
||||
"question_count": 2,
|
||||
"questions": [
|
||||
1,
|
||||
2
|
||||
]
|
||||
},
|
||||
{
|
||||
"topic_num": 21,
|
||||
"start_page": 1032,
|
||||
"end_page": 1040,
|
||||
"question_count": 4,
|
||||
"questions": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4
|
||||
]
|
||||
}
|
||||
]
|
||||
130
translate_aliyun.py
Normal file
130
translate_aliyun.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
使用阿里百炼API翻译题目内容
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import ssl
|
||||
|
||||
API_KEY = "sk-74905419d30541d18991396892bb27b0"
|
||||
API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
|
||||
|
||||
def translate_text(text):
|
||||
"""
|
||||
使用阿里百炼API翻译文本
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return text
|
||||
|
||||
prompt = f"""请将以下英文翻译成中文,保持专业术语的准确性,直接输出翻译结果,不要添加任何解释:
|
||||
|
||||
英文原文:
|
||||
{text}
|
||||
|
||||
中文翻译:"""
|
||||
|
||||
try:
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {API_KEY}'
|
||||
}
|
||||
|
||||
data = {
|
||||
"model": "qwen-turbo",
|
||||
"input": {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
]
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 2000
|
||||
}
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
API_URL,
|
||||
data=json.dumps(data).encode('utf-8'),
|
||||
headers=headers,
|
||||
method='POST'
|
||||
)
|
||||
|
||||
ssl_context = ssl.create_default_context()
|
||||
|
||||
with urllib.request.urlopen(req, context=ssl_context, timeout=60) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
if result and 'output' in result and 'text' in result['output']:
|
||||
translated = result['output']['text'].strip()
|
||||
if translated.startswith('中文翻译:'):
|
||||
translated = translated[6:].strip()
|
||||
return translated
|
||||
else:
|
||||
print(f"API响应格式错误: {result}")
|
||||
return text
|
||||
|
||||
except Exception as e:
|
||||
print(f"翻译错误: {e}")
|
||||
return text
|
||||
|
||||
def translate_questions(input_file, output_file, start_index=0, batch_size=10):
|
||||
"""
|
||||
翻译题目,支持分批处理
|
||||
"""
|
||||
if os.path.exists(output_file):
|
||||
with open(output_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
else:
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
|
||||
total = len(questions)
|
||||
end_index = min(start_index + batch_size, total)
|
||||
|
||||
print(f"正在翻译第 {start_index + 1} 到 {end_index} 题,共 {total} 题")
|
||||
|
||||
for i in range(start_index, end_index):
|
||||
q = questions[i]
|
||||
print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
|
||||
|
||||
if 'stem_cn' not in q or not q['stem_cn'] or q['stem_cn'] == '待翻译...':
|
||||
print(f" 翻译题干...")
|
||||
q['stem_cn'] = translate_text(q['stem'])
|
||||
time.sleep(0.5)
|
||||
|
||||
for j, opt in enumerate(q['options']):
|
||||
if 'text_cn' not in opt or not opt['text_cn'] or opt['text_cn'] == '待翻译...':
|
||||
print(f" 翻译选项 {opt['label']}...")
|
||||
opt['text_cn'] = translate_text(opt['text'])
|
||||
time.sleep(0.3)
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(questions, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n翻译进度: {end_index}/{total}")
|
||||
|
||||
if end_index < total:
|
||||
print(f"继续翻译请运行: python3 translate_aliyun.py --start {end_index}")
|
||||
else:
|
||||
print("所有题目翻译完成!")
|
||||
|
||||
return questions
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--start', type=int, default=0, help='起始索引')
|
||||
parser.add_argument('--batch', type=int, default=10, help='批量大小')
|
||||
args = parser.parse_args()
|
||||
|
||||
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
|
||||
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
|
||||
|
||||
translate_questions(input_file, output_file, args.start, args.batch)
|
||||
116
translate_all.py
Normal file
116
translate_all.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
自动翻译所有题目
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
import ssl
|
||||
|
||||
API_KEY = "sk-74905419d30541d18991396892bb27b0"
|
||||
API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
|
||||
|
||||
def translate_text(text):
|
||||
"""
|
||||
使用阿里百炼API翻译文本
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return text
|
||||
|
||||
prompt = f"""请将以下英文翻译成中文,保持专业术语的准确性,直接输出翻译结果,不要添加任何解释:
|
||||
|
||||
英文原文:
|
||||
{text}
|
||||
|
||||
中文翻译:"""
|
||||
|
||||
try:
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {API_KEY}'
|
||||
}
|
||||
|
||||
data = {
|
||||
"model": "qwen-turbo",
|
||||
"input": {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}
|
||||
]
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.1,
|
||||
"max_tokens": 2000
|
||||
}
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
API_URL,
|
||||
data=json.dumps(data).encode('utf-8'),
|
||||
headers=headers,
|
||||
method='POST'
|
||||
)
|
||||
|
||||
ssl_context = ssl.create_default_context()
|
||||
|
||||
with urllib.request.urlopen(req, context=ssl_context, timeout=60) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
if result and 'output' in result and 'text' in result['output']:
|
||||
translated = result['output']['text'].strip()
|
||||
if translated.startswith('中文翻译:'):
|
||||
translated = translated[6:].strip()
|
||||
return translated
|
||||
else:
|
||||
print(f"API响应格式错误: {result}")
|
||||
return text
|
||||
|
||||
except Exception as e:
|
||||
print(f"翻译错误: {e}")
|
||||
return text
|
||||
|
||||
def translate_all_questions(input_file, output_file):
|
||||
"""
|
||||
翻译所有题目
|
||||
"""
|
||||
if os.path.exists(output_file):
|
||||
with open(output_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
else:
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
|
||||
total = len(questions)
|
||||
|
||||
for i, q in enumerate(questions):
|
||||
print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
|
||||
|
||||
need_save = False
|
||||
|
||||
if 'stem_cn' not in q or not q['stem_cn'] or q['stem_cn'] == '待翻译...':
|
||||
print(f" 翻译题干...")
|
||||
q['stem_cn'] = translate_text(q['stem'])
|
||||
need_save = True
|
||||
time.sleep(0.3)
|
||||
|
||||
for opt in q['options']:
|
||||
if 'text_cn' not in opt or not opt['text_cn'] or opt['text_cn'] == '待翻译...':
|
||||
print(f" 翻译选项 {opt['label']}...")
|
||||
opt['text_cn'] = translate_text(opt['text'])
|
||||
need_save = True
|
||||
time.sleep(0.2)
|
||||
|
||||
if need_save:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(questions, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n所有 {total} 道题目翻译完成!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
|
||||
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
|
||||
|
||||
translate_all_questions(input_file, output_file)
|
||||
99
translate_api.py
Normal file
99
translate_api.py
Normal file
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
翻译题目内容 - 使用翻译API
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import ssl
|
||||
|
||||
ssl_context = ssl.create_default_context()
|
||||
ssl_context.check_hostname = False
|
||||
ssl_context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
def translate_text(text, target_lang='zh-CN'):
|
||||
"""
|
||||
使用Google Translate API翻译文本
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return text
|
||||
|
||||
try:
|
||||
base_url = "https://translate.googleapis.com/translate_a/single"
|
||||
params = {
|
||||
'client': 'gtx',
|
||||
'sl': 'en',
|
||||
'tl': target_lang,
|
||||
'dt': 't',
|
||||
'q': text
|
||||
}
|
||||
|
||||
url = base_url + '?' + urllib.parse.urlencode(params)
|
||||
|
||||
req = urllib.request.Request(url)
|
||||
req.add_header('User-Agent', 'Mozilla/5.0')
|
||||
|
||||
with urllib.request.urlopen(req, context=ssl_context, timeout=30) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
|
||||
if result and result[0]:
|
||||
translated = ''.join([item[0] for item in result[0] if item[0]])
|
||||
return translated
|
||||
except Exception as e:
|
||||
print(f"翻译错误: {e}")
|
||||
return text
|
||||
|
||||
return text
|
||||
|
||||
def translate_questions(input_file, output_file, start_index=0, batch_size=50):
|
||||
"""
|
||||
翻译题目,支持分批处理
|
||||
"""
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
|
||||
total = len(questions)
|
||||
end_index = min(start_index + batch_size, total)
|
||||
|
||||
print(f"正在翻译第 {start_index + 1} 到 {end_index} 题,共 {total} 题")
|
||||
|
||||
for i in range(start_index, end_index):
|
||||
q = questions[i]
|
||||
print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
|
||||
|
||||
if 'stem_cn' not in q or not q['stem_cn']:
|
||||
q['stem_cn'] = translate_text(q['stem'])
|
||||
time.sleep(0.5)
|
||||
|
||||
for opt in q['options']:
|
||||
if 'text_cn' not in opt or not opt['text_cn']:
|
||||
opt['text_cn'] = translate_text(opt['text'])
|
||||
time.sleep(0.3)
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(questions, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"翻译进度: {end_index}/{total}")
|
||||
|
||||
if end_index < total:
|
||||
print(f"继续翻译请运行: python3 translate_api.py --start {end_index}")
|
||||
|
||||
return questions
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--start', type=int, default=0, help='起始索引')
|
||||
parser.add_argument('--batch', type=int, default=50, help='批量大小')
|
||||
args = parser.parse_args()
|
||||
|
||||
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
|
||||
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
|
||||
|
||||
if os.path.exists(output_file):
|
||||
input_file = output_file
|
||||
|
||||
translate_questions(input_file, output_file, args.start, args.batch)
|
||||
52
translate_questions.py
Normal file
52
translate_questions.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
翻译题目内容
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
def translate_text(text):
|
||||
"""
|
||||
翻译文本 - 使用简单的词典替换方式
|
||||
这里提供一个框架,实际翻译需要使用翻译API
|
||||
"""
|
||||
return text
|
||||
|
||||
def translate_questions(input_file, output_file):
|
||||
"""
|
||||
翻译所有题目
|
||||
"""
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
questions = json.load(f)
|
||||
|
||||
translated_questions = []
|
||||
|
||||
for q in questions:
|
||||
translated_q = {
|
||||
'topic': q['topic'],
|
||||
'question_num': q['question_num'],
|
||||
'stem_en': q['stem'],
|
||||
'stem_cn': translate_text(q['stem']),
|
||||
'options': [],
|
||||
'answer': q['answer']
|
||||
}
|
||||
|
||||
for opt in q['options']:
|
||||
translated_q['options'].append({
|
||||
'label': opt['label'],
|
||||
'text_en': opt['text'],
|
||||
'text_cn': translate_text(opt['text'])
|
||||
})
|
||||
|
||||
translated_questions.append(translated_q)
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(translated_questions, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"翻译完成,保存到: {output_file}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
|
||||
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
|
||||
translate_questions(input_file, output_file)
|
||||
28
view_pdf.py
Normal file
28
view_pdf.py
Normal file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
分析PDF文件结构,查看前几页内容
|
||||
"""
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
def view_pdf_content(pdf_path, start_page=0, end_page=5):
|
||||
"""
|
||||
查看PDF指定页面的内容
|
||||
"""
|
||||
reader = PdfReader(pdf_path)
|
||||
total_pages = len(reader.pages)
|
||||
print(f"PDF总页数: {total_pages}")
|
||||
|
||||
for page_num in range(start_page, min(end_page, total_pages)):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
print(f"\n{'='*60}")
|
||||
print(f"第 {page_num + 1} 页:")
|
||||
print('='*60)
|
||||
if text:
|
||||
print(text[:2000])
|
||||
else:
|
||||
print("(无文本内容)")
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
|
||||
view_pdf_content(pdf_path, 0, 3)
|
||||
Reference in New Issue
Block a user