first commit

This commit is contained in:
2026-03-21 09:12:47 +08:00
commit a1e76157c9
80 changed files with 506309 additions and 0 deletions

470844
MB-330_with_discussion.pdf Normal file

File diff suppressed because one or more lines are too long

73
analyze_pdf.py Normal file
View File

@@ -0,0 +1,73 @@
#!/usr/bin/env python3
"""
分析PDF文件结构识别Topic位置
"""
import re
from PyPDF2 import PdfReader
def analyze_pdf_structure(pdf_path):
"""
分析PDF结构找出所有Topic的位置
"""
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
print(f"PDF总页数: {total_pages}")
topic_pattern = re.compile(r'^Topic\s+(\d+)', re.IGNORECASE)
question_pattern = re.compile(r'^Question\s+(\d+)', re.IGNORECASE)
topics = {}
current_topic = None
question_count = 0
total_questions = 0
for page_num in range(total_pages):
page = reader.pages[page_num]
text = page.extract_text()
if text:
lines = text.split('\n')
for line in lines:
line = line.strip()
topic_match = topic_pattern.match(line)
if topic_match:
if current_topic is not None:
topics[current_topic]['end_page'] = page_num
topics[current_topic]['question_count'] = question_count
total_questions += question_count
topic_num = int(topic_match.group(1))
current_topic = topic_num
topics[topic_num] = {
'title': line,
'start_page': page_num,
'end_page': None,
'question_count': 0
}
question_count = 0
print(f"发现 Topic {topic_num}: 第 {page_num + 1} 页 - {line}")
question_match = question_pattern.match(line)
if question_match and current_topic is not None:
q_num = int(question_match.group(1))
if q_num > question_count:
question_count = q_num
if current_topic is not None:
topics[current_topic]['end_page'] = total_pages - 1
topics[current_topic]['question_count'] = question_count
total_questions += question_count
print(f"\n共发现 {len(topics)} 个Topic")
print(f"总题目数: {total_questions}")
print("\n各Topic统计:")
for topic_num in sorted(topics.keys()):
info = topics[topic_num]
print(f" Topic {topic_num}: 第 {info['start_page']+1}-{info['end_page']+1} 页, {info['question_count']} 道题")
return topics, total_pages
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
topics, total_pages = analyze_pdf_structure(pdf_path)

91
analyze_pdf_v2.py Normal file
View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""
分析PDF文件结构识别Topic位置和题目数量
"""
import re
import json
from PyPDF2 import PdfReader
def analyze_pdf_structure(pdf_path):
"""
分析PDF结构找出所有Topic的位置和题目数量
"""
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
print(f"PDF总页数: {total_pages}")
topic_pattern = re.compile(r'^Topic\s+(\d+)$', re.IGNORECASE)
question_pattern = re.compile(r'Question\s+#(\d+)', re.IGNORECASE)
topics = {}
current_topic = None
current_topic_start = None
all_questions = []
for page_num in range(total_pages):
page = reader.pages[page_num]
text = page.extract_text()
if text:
lines = text.split('\n')
for line in lines:
line = line.strip()
topic_match = topic_pattern.match(line)
if topic_match:
if current_topic is not None:
topics[current_topic] = {
'topic_num': current_topic,
'start_page': current_topic_start,
'end_page': page_num - 1,
'questions': all_questions.copy()
}
all_questions = []
current_topic = int(topic_match.group(1))
current_topic_start = page_num
print(f"发现 Topic {current_topic}: 第 {page_num + 1}")
question_match = question_pattern.search(line)
if question_match:
q_num = int(question_match.group(1))
if q_num not in all_questions:
all_questions.append(q_num)
if current_topic is not None:
topics[current_topic] = {
'topic_num': current_topic,
'start_page': current_topic_start,
'end_page': total_pages - 1,
'questions': all_questions
}
print(f"\n共发现 {len(topics)} 个Topic")
result = []
total_questions = 0
print("\n各Topic统计:")
for topic_num in sorted(topics.keys()):
info = topics[topic_num]
q_count = len(info['questions'])
total_questions += q_count
print(f" Topic {topic_num}: 第 {info['start_page']+1}-{info['end_page']+1} 页, {q_count} 道题")
result.append({
'topic_num': topic_num,
'start_page': info['start_page'],
'end_page': info['end_page'],
'question_count': q_count,
'questions': sorted(info['questions'])
})
print(f"\n总题目数: {total_questions}")
with open('/Users/duguoyou/D365/topics_info.json', 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print("\nTopic信息已保存到 topics_info.json")
return result
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
topics = analyze_pdf_structure(pdf_path)

82
analyze_pdf_v3.py Normal file
View File

@@ -0,0 +1,82 @@
#!/usr/bin/env python3
"""
分析PDF文件结构识别Topic位置和题目数量 - 改进版
"""
import re
import json
from PyPDF2 import PdfReader
def analyze_pdf_structure(pdf_path):
"""
分析PDF结构找出所有Topic的位置和题目数量
"""
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
print(f"PDF总页数: {total_pages}")
topic_header_pattern = re.compile(r'^Topic\s+(\d+)$', re.IGNORECASE)
question_pattern = re.compile(r'Question\s+#(\d+)', re.IGNORECASE)
topic_pages = {}
all_questions_in_topic = {}
for page_num in range(total_pages):
page = reader.pages[page_num]
text = page.extract_text()
if text:
lines = text.split('\n')
for line in lines:
line = line.strip()
topic_match = topic_header_pattern.match(line)
if topic_match:
topic_num = int(topic_match.group(1))
if topic_num not in topic_pages:
topic_pages[topic_num] = {'first_page': page_num, 'last_page': page_num}
all_questions_in_topic[topic_num] = []
else:
topic_pages[topic_num]['last_page'] = page_num
question_match = question_pattern.search(line)
if question_match:
q_num = int(question_match.group(1))
for topic_num in sorted(topic_pages.keys(), reverse=True):
if topic_num not in all_questions_in_topic:
all_questions_in_topic[topic_num] = []
if page_num >= topic_pages[topic_num]['first_page']:
if q_num not in all_questions_in_topic[topic_num]:
all_questions_in_topic[topic_num].append(q_num)
break
print(f"\n共发现 {len(topic_pages)} 个Topic")
result = []
total_questions = 0
print("\n各Topic统计:")
for topic_num in sorted(topic_pages.keys()):
questions = sorted(all_questions_in_topic.get(topic_num, []))
q_count = len(questions)
total_questions += q_count
first_page = topic_pages[topic_num]['first_page']
last_page = topic_pages[topic_num]['last_page']
print(f" Topic {topic_num}: 第 {first_page+1}-{last_page+1} 页, {q_count} 道题 (题目编号: {questions[:5]}{'...' if len(questions) > 5 else ''})")
result.append({
'topic_num': topic_num,
'start_page': first_page,
'end_page': last_page,
'question_count': q_count,
'questions': questions
})
print(f"\n总题目数: {total_questions}")
with open('/Users/duguoyou/D365/topics_info.json', 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print("\nTopic信息已保存到 topics_info.json")
return result
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
topics = analyze_pdf_structure(pdf_path)

24
exam-viewer/.gitignore vendored Normal file
View File

@@ -0,0 +1,24 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

3
exam-viewer/.vscode/extensions.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"recommendations": ["Vue.volar"]
}

5
exam-viewer/README.md Normal file
View File

@@ -0,0 +1,5 @@
# Vue 3 + TypeScript + Vite
This template should help get you started developing with Vue 3 and TypeScript in Vite. The template uses Vue 3 `<script setup>` SFCs, check out the [script setup docs](https://v3.vuejs.org/api/sfc-script-setup.html#sfc-script-setup) to learn more.
Learn more about the recommended Project Setup and IDE Support in the [Vue Docs TypeScript Guide](https://vuejs.org/guide/typescript/overview.html#project-setup).

13
exam-viewer/index.html Normal file
View File

@@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>exam-viewer</title>
</head>
<body>
<div id="app"></div>
<script type="module" src="/src/main.ts"></script>
</body>
</html>

1570
exam-viewer/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

25
exam-viewer/package.json Normal file
View File

@@ -0,0 +1,25 @@
{
"name": "exam-viewer",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "vue-tsc -b && vite build",
"preview": "vite preview"
},
"dependencies": {
"@element-plus/icons-vue": "^2.3.2",
"element-plus": "^2.13.6",
"pinia": "^3.0.4",
"vue": "^3.5.30"
},
"devDependencies": {
"@types/node": "^24.12.0",
"@vitejs/plugin-vue": "^6.0.5",
"@vue/tsconfig": "^0.9.0",
"typescript": "~5.9.3",
"vite": "^8.0.1",
"vue-tsc": "^3.2.5"
}
}

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 9.3 KiB

View File

@@ -0,0 +1,24 @@
<svg xmlns="http://www.w3.org/2000/svg">
<symbol id="bluesky-icon" viewBox="0 0 16 17">
<g clip-path="url(#bluesky-clip)"><path fill="#08060d" d="M7.75 7.735c-.693-1.348-2.58-3.86-4.334-5.097-1.68-1.187-2.32-.981-2.74-.79C.188 2.065.1 2.812.1 3.251s.241 3.602.398 4.13c.52 1.744 2.367 2.333 4.07 2.145-2.495.37-4.71 1.278-1.805 4.512 3.196 3.309 4.38-.71 4.987-2.746.608 2.036 1.307 5.91 4.93 2.746 2.72-2.746.747-4.143-1.747-4.512 1.702.189 3.55-.4 4.07-2.145.156-.528.397-3.691.397-4.13s-.088-1.186-.575-1.406c-.42-.19-1.06-.395-2.741.79-1.755 1.24-3.64 3.752-4.334 5.099"/></g>
<defs><clipPath id="bluesky-clip"><path fill="#fff" d="M.1.85h15.3v15.3H.1z"/></clipPath></defs>
</symbol>
<symbol id="discord-icon" viewBox="0 0 20 19">
<path fill="#08060d" d="M16.224 3.768a14.5 14.5 0 0 0-3.67-1.153c-.158.286-.343.67-.47.976a13.5 13.5 0 0 0-4.067 0c-.128-.306-.317-.69-.476-.976A14.4 14.4 0 0 0 3.868 3.77C1.546 7.28.916 10.703 1.231 14.077a14.7 14.7 0 0 0 4.5 2.306q.545-.748.965-1.587a9.5 9.5 0 0 1-1.518-.74q.191-.14.372-.293c2.927 1.369 6.107 1.369 8.999 0q.183.152.372.294-.723.437-1.52.74.418.838.963 1.588a14.6 14.6 0 0 0 4.504-2.308c.37-3.911-.63-7.302-2.644-10.309m-9.13 8.234c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.894 0 1.614.82 1.599 1.82.001 1-.705 1.82-1.6 1.82m5.91 0c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.893 0 1.614.82 1.599 1.82 0 1-.706 1.82-1.6 1.82"/>
</symbol>
<symbol id="documentation-icon" viewBox="0 0 21 20">
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="m15.5 13.333 1.533 1.322c.645.555.967.833.967 1.178s-.322.623-.967 1.179L15.5 18.333m-3.333-5-1.534 1.322c-.644.555-.966.833-.966 1.178s.322.623.966 1.179l1.534 1.321"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M17.167 10.836v-4.32c0-1.41 0-2.117-.224-2.68-.359-.906-1.118-1.621-2.08-1.96-.599-.21-1.349-.21-2.848-.21-2.623 0-3.935 0-4.983.369-1.684.591-3.013 1.842-3.641 3.428C3 6.449 3 7.684 3 10.154v2.122c0 2.558 0 3.838.706 4.726q.306.383.713.671c.76.536 1.79.64 3.581.66"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M3 10a2.78 2.78 0 0 1 2.778-2.778c.555 0 1.209.097 1.748-.047.48-.129.854-.503.982-.982.145-.54.048-1.194.048-1.749a2.78 2.78 0 0 1 2.777-2.777"/>
</symbol>
<symbol id="github-icon" viewBox="0 0 19 19">
<path fill="#08060d" fill-rule="evenodd" d="M9.356 1.85C5.05 1.85 1.57 5.356 1.57 9.694a7.84 7.84 0 0 0 5.324 7.44c.387.079.528-.168.528-.376 0-.182-.013-.805-.013-1.454-2.165.467-2.616-.935-2.616-.935-.349-.91-.864-1.143-.864-1.143-.71-.48.051-.48.051-.48.787.051 1.2.805 1.2.805.695 1.194 1.817.857 2.268.649.064-.507.27-.857.49-1.052-1.728-.182-3.545-.857-3.545-3.87 0-.857.31-1.558.8-2.104-.078-.195-.349-1 .077-2.078 0 0 .657-.208 2.14.805a7.5 7.5 0 0 1 1.946-.26c.657 0 1.328.092 1.946.26 1.483-1.013 2.14-.805 2.14-.805.426 1.078.155 1.883.078 2.078.502.546.799 1.247.799 2.104 0 3.013-1.818 3.675-3.558 3.87.284.247.528.714.528 1.454 0 1.052-.012 1.896-.012 2.156 0 .208.142.455.528.377a7.84 7.84 0 0 0 5.324-7.441c.013-4.338-3.48-7.844-7.773-7.844" clip-rule="evenodd"/>
</symbol>
<symbol id="social-icon" viewBox="0 0 20 20">
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M12.5 6.667a4.167 4.167 0 1 0-8.334 0 4.167 4.167 0 0 0 8.334 0"/>
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M2.5 16.667a5.833 5.833 0 0 1 8.75-5.053m3.837.474.513 1.035c.07.144.257.282.414.309l.93.155c.596.1.736.536.307.965l-.723.73a.64.64 0 0 0-.152.531l.207.903c.164.715-.213.991-.84.618l-.872-.52a.63.63 0 0 0-.577 0l-.872.52c-.624.373-1.003.094-.84-.618l.207-.903a.64.64 0 0 0-.152-.532l-.723-.729c-.426-.43-.289-.864.306-.964l.93-.156a.64.64 0 0 0 .412-.31l.513-1.034c.28-.562.735-.562 1.012 0"/>
</symbol>
<symbol id="x-icon" viewBox="0 0 19 19">
<path fill="#08060d" fill-rule="evenodd" d="M1.893 1.98c.052.072 1.245 1.769 2.653 3.77l2.892 4.114c.183.261.333.48.333.486s-.068.089-.152.183l-.522.593-.765.867-3.597 4.087c-.375.426-.734.834-.798.905a1 1 0 0 0-.118.148c0 .01.236.017.664.017h.663l.729-.83c.4-.457.796-.906.879-.999a692 692 0 0 0 1.794-2.038c.034-.037.301-.34.594-.675l.551-.624.345-.392a7 7 0 0 1 .34-.374c.006 0 .93 1.306 2.052 2.903l2.084 2.965.045.063h2.275c1.87 0 2.273-.003 2.266-.021-.008-.02-1.098-1.572-3.894-5.547-2.013-2.862-2.28-3.246-2.273-3.266.008-.019.282-.332 2.085-2.38l2-2.274 1.567-1.782c.022-.028-.016-.03-.65-.03h-.674l-.3.342a871 871 0 0 1-1.782 2.025c-.067.075-.405.458-.75.852a100 100 0 0 1-.803.91c-.148.172-.299.344-.99 1.127-.304.343-.32.358-.345.327-.015-.019-.904-1.282-1.976-2.808L6.365 1.85H1.8zm1.782.91 8.078 11.294c.772 1.08 1.413 1.973 1.425 1.984.016.017.241.02 1.05.017l1.03-.004-2.694-3.766L7.796 5.75 5.722 2.852l-1.039-.004-1.039-.004z" clip-rule="evenodd"/>
</symbol>
</svg>

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

522
exam-viewer/src/App.vue Normal file
View File

@@ -0,0 +1,522 @@
<template>
<div class="app-container">
<el-container>
<el-aside width="220px" class="sidebar">
<div class="logo">
<h2>MB-330 考试学习</h2>
</div>
<el-menu
:default-active="String(currentTopic)"
@select="handleTopicSelect"
class="topic-menu"
>
<el-menu-item
v-for="topic in topics"
:key="topic"
:index="String(topic)"
>
<span>Topic {{ topic }}</span>
<el-badge :value="topicStats[topic] || 0" class="topic-badge" />
</el-menu-item>
</el-menu>
</el-aside>
<el-main class="main-content">
<div v-if="loading" class="loading-container">
<el-icon class="is-loading" :size="40"><Loading /></el-icon>
<p>加载中...</p>
</div>
<template v-else-if="currentQuestion">
<div class="question-header">
<h3>Topic {{ currentTopic }} - Question {{ currentQuestion.question_num }}</h3>
<div class="header-right">
<el-button
type="warning"
size="small"
@click="handleOpenPdf"
>
<el-icon><Document /></el-icon>
查看原PDF
</el-button>
<div class="jump-control">
<span>跳转到第</span>
<el-input-number
v-model="jumpQuestionNum"
:min="1"
:max="currentTopicQuestions.length"
size="small"
controls-position="right"
/>
<span></span>
<el-button type="primary" size="small" @click="handleJumpQuestion">
跳转
</el-button>
</div>
<span class="question-progress">
{{ currentQuestionIndex + 1 }} / {{ currentTopicQuestions.length }}
</span>
</div>
</div>
<div class="question-content">
<div class="bilingual-container">
<div class="language-panel english-panel">
<div class="panel-header">
<el-tag type="primary">English</el-tag>
</div>
<div class="stem-text">
{{ currentQuestion.stem }}
</div>
<div class="options-list">
<div
v-for="option in currentQuestion.options"
:key="option.label"
class="option-item"
:class="{
'correct-option': showAnswer && currentQuestion.answer.includes(option.label)
}"
>
<span class="option-label">{{ option.label }}.</span>
<span class="option-text">{{ option.text }}</span>
</div>
</div>
</div>
<div class="language-panel chinese-panel">
<div class="panel-header">
<el-tag type="success">中文</el-tag>
</div>
<div class="stem-text">
{{ currentQuestion.stem_cn || '待翻译...' }}
</div>
<div class="options-list">
<div
v-for="option in currentQuestion.options"
:key="option.label"
class="option-item"
:class="{
'correct-option': showAnswer && currentQuestion.answer.includes(option.label)
}"
>
<span class="option-label">{{ option.label }}.</span>
<span class="option-text">{{ option.text_cn || '待翻译...' }}</span>
</div>
</div>
</div>
</div>
<div v-if="showAnswer && currentQuestion.answer" class="answer-section">
<el-alert
:title="`正确答案: ${currentQuestion.answer}`"
type="success"
:closable="false"
show-icon
/>
</div>
</div>
<div class="question-actions">
<el-button
@click="handlePrevQuestion"
:disabled="currentQuestionIndex === 0"
>
<el-icon><ArrowLeft /></el-icon>
上一题
</el-button>
<el-button
type="primary"
@click="handleToggleAnswer"
>
{{ showAnswer ? '隐藏答案' : '显示答案' }}
</el-button>
<el-button
@click="handleNextQuestion"
:disabled="currentQuestionIndex === currentTopicQuestions.length - 1"
>
下一题
<el-icon><ArrowRight /></el-icon>
</el-button>
</div>
</template>
</el-main>
</el-container>
<Teleport to="body">
<div v-if="pdfDialogVisible" class="pdf-overlay" :class="{ 'is-maximized': isMaximized }">
<div class="pdf-modal">
<div class="pdf-modal-header">
<span class="pdf-modal-title">Topic {{ currentTopic }} - 原文PDF</span>
<div class="pdf-modal-actions">
<el-button
type="primary"
size="small"
circle
@click="toggleMaximize"
:title="isMaximized ? '还原' : '最大化'"
>
<el-icon v-if="isMaximized"><Minus /></el-icon>
<el-icon v-else><FullScreen /></el-icon>
</el-button>
<el-button
type="danger"
size="small"
circle
@click="closePdfDialog"
title="关闭"
>
<el-icon><Close /></el-icon>
</el-button>
</div>
</div>
<div class="pdf-modal-body">
<iframe
:src="currentPdfUrl"
class="pdf-iframe"
frameborder="0"
></iframe>
</div>
</div>
</div>
</Teleport>
</div>
</template>
<script setup lang="ts">
import { onMounted, ref, watch, computed } from 'vue'
import { useQuestionStore } from './stores/questions'
import { storeToRefs } from 'pinia'
const store = useQuestionStore()
const {
loading,
topics,
currentTopic,
currentQuestionIndex,
currentQuestion,
currentTopicQuestions,
showAnswer,
topicStats
} = storeToRefs(store)
const jumpQuestionNum = ref(1)
const pdfDialogVisible = ref(false)
const isMaximized = ref(false)
const currentPdfUrl = computed(() => {
const topicNum = String(currentTopic.value).padStart(2, '0')
return `/pdfs/topic_${topicNum}.pdf`
})
watch(currentQuestionIndex, (newIndex) => {
jumpQuestionNum.value = newIndex + 1
})
onMounted(() => {
store.loadQuestions()
})
function handleTopicSelect(index: string) {
store.setTopic(Number(index))
jumpQuestionNum.value = 1
}
function handlePrevQuestion() {
store.prevQuestion()
}
function handleNextQuestion() {
store.nextQuestion()
}
function handleToggleAnswer() {
store.toggleAnswer()
}
function handleJumpQuestion() {
const targetIndex = jumpQuestionNum.value - 1
if (targetIndex >= 0 && targetIndex < currentTopicQuestions.value.length) {
store.jumpToQuestion(targetIndex)
}
}
function handleOpenPdf() {
pdfDialogVisible.value = true
}
function toggleMaximize() {
isMaximized.value = !isMaximized.value
}
function closePdfDialog() {
pdfDialogVisible.value = false
isMaximized.value = false
}
</script>
<style scoped>
.app-container {
height: 100vh;
background-color: #f5f7fa;
}
.el-container {
height: 100%;
}
.sidebar {
background-color: #fff;
border-right: 1px solid #e4e7ed;
overflow-y: auto;
}
.logo {
padding: 20px;
text-align: center;
border-bottom: 1px solid #e4e7ed;
}
.logo h2 {
margin: 0;
color: #409eff;
font-size: 18px;
}
.topic-menu {
border-right: none;
}
.topic-badge {
margin-left: auto;
}
.main-content {
padding: 20px;
overflow-y: auto;
}
.loading-container {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100%;
}
.question-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 1px solid #e4e7ed;
flex-wrap: wrap;
gap: 10px;
}
.question-header h3 {
margin: 0;
color: #303133;
}
.header-right {
display: flex;
align-items: center;
gap: 20px;
}
.jump-control {
display: flex;
align-items: center;
gap: 8px;
font-size: 14px;
color: #606266;
}
.jump-control .el-input-number {
width: 80px;
}
.question-progress {
color: #909399;
font-size: 14px;
}
.question-content {
background-color: #fff;
border-radius: 8px;
padding: 24px;
box-shadow: 0 2px 12px rgba(0, 0, 0, 0.05);
}
.bilingual-container {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 24px;
}
.language-panel {
padding: 16px;
border-radius: 6px;
border: 1px solid #e4e7ed;
}
.english-panel {
background-color: #fafafa;
}
.chinese-panel {
background-color: #f0f9eb;
}
.panel-header {
margin-bottom: 16px;
}
.stem-text {
font-size: 15px;
line-height: 1.8;
color: #303133;
margin-bottom: 20px;
padding: 12px;
background-color: #fff;
border-radius: 4px;
}
.options-list {
display: flex;
flex-direction: column;
gap: 12px;
}
.option-item {
display: flex;
padding: 12px 16px;
background-color: #fff;
border-radius: 4px;
border: 1px solid #dcdfe6;
transition: all 0.3s;
}
.option-item:hover {
border-color: #409eff;
background-color: #ecf5ff;
}
.option-item.correct-option {
border-color: #67c23a;
background-color: #f0f9eb;
}
.option-label {
font-weight: bold;
color: #409eff;
margin-right: 8px;
min-width: 24px;
}
.option-text {
color: #303133;
line-height: 1.6;
}
.answer-section {
margin-top: 20px;
padding-top: 20px;
border-top: 1px solid #e4e7ed;
}
.question-actions {
display: flex;
justify-content: center;
gap: 16px;
margin-top: 24px;
}
.pdf-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: rgba(0, 0, 0, 0.5);
display: flex;
justify-content: center;
align-items: center;
z-index: 2000;
}
.pdf-modal {
background-color: #fff;
border-radius: 8px;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.15);
display: flex;
flex-direction: column;
width: 90%;
height: 90vh;
transition: all 0.3s ease;
}
.pdf-overlay.is-maximized .pdf-modal {
width: 100%;
height: 100vh;
border-radius: 0;
}
.pdf-modal-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 12px 16px;
background-color: #f5f7fa;
border-bottom: 1px solid #e4e7ed;
border-radius: 8px 8px 0 0;
flex-shrink: 0;
}
.pdf-overlay.is-maximized .pdf-modal-header {
border-radius: 0;
}
.pdf-modal-title {
font-size: 16px;
font-weight: 600;
color: #303133;
}
.pdf-modal-actions {
display: flex;
gap: 8px;
}
.pdf-modal-body {
flex: 1;
overflow: hidden;
}
.pdf-iframe {
width: 100%;
height: 100%;
border: none;
}
@media (max-width: 1200px) {
.bilingual-container {
grid-template-columns: 1fr;
}
.question-header {
flex-direction: column;
align-items: flex-start;
}
.header-right {
width: 100%;
justify-content: space-between;
flex-wrap: wrap;
}
}
</style>

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 8.5 KiB

View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="37.07" height="36" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 198"><path fill="#41B883" d="M204.8 0H256L128 220.8L0 0h97.92L128 51.2L157.44 0h47.36Z"></path><path fill="#41B883" d="m0 0l128 220.8L256 0h-51.2L128 132.48L50.56 0H0Z"></path><path fill="#35495E" d="M50.56 0L128 133.12L204.8 0h-47.36L128 51.2L97.92 0H50.56Z"></path></svg>

After

Width:  |  Height:  |  Size: 496 B

View File

@@ -0,0 +1,93 @@
<script setup lang="ts">
import { ref } from 'vue'
import viteLogo from '../assets/vite.svg'
import heroImg from '../assets/hero.png'
import vueLogo from '../assets/vue.svg'
const count = ref(0)
</script>
<template>
<section id="center">
<div class="hero">
<img :src="heroImg" class="base" width="170" height="179" alt="" />
<img :src="vueLogo" class="framework" alt="Vue logo" />
<img :src="viteLogo" class="vite" alt="Vite logo" />
</div>
<div>
<h1>Get started</h1>
<p>Edit <code>src/App.vue</code> and save to test <code>HMR</code></p>
</div>
<button class="counter" @click="count++">Count is {{ count }}</button>
</section>
<div class="ticks"></div>
<section id="next-steps">
<div id="docs">
<svg class="icon" role="presentation" aria-hidden="true">
<use href="/icons.svg#documentation-icon"></use>
</svg>
<h2>Documentation</h2>
<p>Your questions, answered</p>
<ul>
<li>
<a href="https://vite.dev/" target="_blank">
<img class="logo" :src="viteLogo" alt="" />
Explore Vite
</a>
</li>
<li>
<a href="https://vuejs.org/" target="_blank">
<img class="button-icon" :src="vueLogo" alt="" />
Learn more
</a>
</li>
</ul>
</div>
<div id="social">
<svg class="icon" role="presentation" aria-hidden="true">
<use href="/icons.svg#social-icon"></use>
</svg>
<h2>Connect with us</h2>
<p>Join the Vite community</p>
<ul>
<li>
<a href="https://github.com/vitejs/vite" target="_blank">
<svg class="button-icon" role="presentation" aria-hidden="true">
<use href="/icons.svg#github-icon"></use>
</svg>
GitHub
</a>
</li>
<li>
<a href="https://chat.vite.dev/" target="_blank">
<svg class="button-icon" role="presentation" aria-hidden="true">
<use href="/icons.svg#discord-icon"></use>
</svg>
Discord
</a>
</li>
<li>
<a href="https://x.com/vite_js" target="_blank">
<svg class="button-icon" role="presentation" aria-hidden="true">
<use href="/icons.svg#x-icon"></use>
</svg>
X.com
</a>
</li>
<li>
<a href="https://bsky.app/profile/vite.dev" target="_blank">
<svg class="button-icon" role="presentation" aria-hidden="true">
<use href="/icons.svg#bluesky-icon"></use>
</svg>
Bluesky
</a>
</li>
</ul>
</div>
</section>
<div class="ticks"></div>
<section id="spacer"></section>
</template>

18
exam-viewer/src/main.ts Normal file
View File

@@ -0,0 +1,18 @@
import { createApp } from 'vue'
import { createPinia } from 'pinia'
import ElementPlus from 'element-plus'
import 'element-plus/dist/index.css'
import * as ElementPlusIconsVue from '@element-plus/icons-vue'
import App from './App.vue'
import './style.css'
const app = createApp(App)
const pinia = createPinia()
for (const [key, component] of Object.entries(ElementPlusIconsVue)) {
app.component(key, component)
}
app.use(pinia)
app.use(ElementPlus)
app.mount('#app')

View File

@@ -0,0 +1,111 @@
import { defineStore } from 'pinia'
import { ref, computed } from 'vue'
export interface QuestionOption {
label: string
text: string
text_cn?: string
}
export interface Question {
topic: number
question_num: number
stem: string
stem_cn?: string
options: QuestionOption[]
answer: string
}
export const useQuestionStore = defineStore('questions', () => {
const questions = ref<Question[]>([])
const currentTopic = ref<number>(1)
const currentQuestionIndex = ref<number>(0)
const showAnswer = ref<boolean>(false)
const loading = ref<boolean>(true)
const topics = computed(() => {
const topicSet = new Set(questions.value.map(q => q.topic))
return Array.from(topicSet).sort((a, b) => a - b)
})
const currentTopicQuestions = computed(() => {
return questions.value.filter(q => q.topic === currentTopic.value)
})
const currentQuestion = computed(() => {
return currentTopicQuestions.value[currentQuestionIndex.value] || null
})
const topicStats = computed(() => {
const stats: Record<number, number> = {}
questions.value.forEach(q => {
stats[q.topic] = (stats[q.topic] || 0) + 1
})
return stats
})
async function loadQuestions() {
try {
loading.value = true
let response = await fetch('/questions_translated.json')
if (!response.ok) {
response = await fetch('/questions.json')
}
const data = await response.json()
questions.value = data
} catch (error) {
console.error('Failed to load questions:', error)
} finally {
loading.value = false
}
}
function setTopic(topic: number) {
currentTopic.value = topic
currentQuestionIndex.value = 0
showAnswer.value = false
}
function nextQuestion() {
if (currentQuestionIndex.value < currentTopicQuestions.value.length - 1) {
currentQuestionIndex.value++
showAnswer.value = false
}
}
function prevQuestion() {
if (currentQuestionIndex.value > 0) {
currentQuestionIndex.value--
showAnswer.value = false
}
}
function jumpToQuestion(index: number) {
if (index >= 0 && index < currentTopicQuestions.value.length) {
currentQuestionIndex.value = index
showAnswer.value = false
}
}
function toggleAnswer() {
showAnswer.value = !showAnswer.value
}
return {
questions,
currentTopic,
currentQuestionIndex,
showAnswer,
loading,
topics,
currentTopicQuestions,
currentQuestion,
topicStats,
loadQuestions,
setTopic,
nextQuestion,
prevQuestion,
jumpToQuestion,
toggleAnswer
}
})

34
exam-viewer/src/style.css Normal file
View File

@@ -0,0 +1,34 @@
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
html, body {
height: 100%;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
#app {
height: 100%;
}
::-webkit-scrollbar {
width: 6px;
height: 6px;
}
::-webkit-scrollbar-track {
background: #f1f1f1;
}
::-webkit-scrollbar-thumb {
background: #c1c1c1;
border-radius: 3px;
}
::-webkit-scrollbar-thumb:hover {
background: #a8a8a8;
}

View File

@@ -0,0 +1,16 @@
{
"extends": "@vue/tsconfig/tsconfig.dom.json",
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
"types": ["vite/client"],
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"erasableSyntaxOnly": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.vue"]
}

View File

@@ -0,0 +1,7 @@
{
"files": [],
"references": [
{ "path": "./tsconfig.app.json" },
{ "path": "./tsconfig.node.json" }
]
}

View File

@@ -0,0 +1,26 @@
{
"compilerOptions": {
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
"target": "ES2023",
"lib": ["ES2023"],
"module": "ESNext",
"types": ["node"],
"skipLibCheck": true,
/* Bundler mode */
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"moduleDetection": "force",
"noEmit": true,
/* Linting */
"strict": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"erasableSyntaxOnly": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedSideEffectImports": true
},
"include": ["vite.config.ts"]
}

View File

@@ -0,0 +1,7 @@
import { defineConfig } from 'vite'
import vue from '@vitejs/plugin-vue'
// https://vite.dev/config/
export default defineConfig({
plugins: [vue()],
})

BIN
exam_data/pdfs/topic_01.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_02.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_03.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_04.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_05.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_06.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_07.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_08.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_09.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_10.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_11.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_12.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_13.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_14.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_15.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_16.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_17.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_18.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_19.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_20.pdf Normal file

Binary file not shown.

BIN
exam_data/pdfs/topic_21.pdf Normal file

Binary file not shown.

7085
exam_data/questions.json Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

165
extract_questions_final.py Normal file
View File

@@ -0,0 +1,165 @@
#!/usr/bin/env python3
"""
精确提取PDF题目内容 - 最终版
"""
import re
import json
import os
from pypdf import PdfReader, PdfWriter
def split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir):
"""
按Topic切割PDF并精确提取题目内容
"""
with open(topics_info_path, 'r', encoding='utf-8') as f:
topics = json.load(f)
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
os.makedirs(output_dir, exist_ok=True)
pdf_dir = os.path.join(output_dir, 'pdfs')
os.makedirs(pdf_dir, exist_ok=True)
all_questions = []
for topic in topics:
topic_num = topic['topic_num']
start_page = topic['start_page']
end_page = topic['end_page']
writer = PdfWriter()
for page_num in range(start_page, min(end_page + 1, total_pages)):
writer.add_page(reader.pages[page_num])
pdf_output_path = os.path.join(pdf_dir, f'topic_{topic_num:02d}.pdf')
with open(pdf_output_path, 'wb') as f:
writer.write(f)
print(f"已保存: {pdf_output_path}")
print(f"正在提取 Topic {topic_num} 的题目内容...")
topic_questions = extract_questions_precise(reader, start_page, end_page, topic_num)
all_questions.extend(topic_questions)
print(f" Topic {topic_num}: 提取了 {len(topic_questions)} 道题")
questions_json_path = os.path.join(output_dir, 'questions.json')
with open(questions_json_path, 'w', encoding='utf-8') as f:
json.dump(all_questions, f, ensure_ascii=False, indent=2)
print(f"\n所有题目已保存到: {questions_json_path}")
print(f"总共提取了 {len(all_questions)} 道题")
return all_questions
def extract_questions_precise(reader, start_page, end_page, topic_num):
"""
精确提取题目内容
"""
questions = []
full_text = ""
for page_num in range(start_page, end_page + 1):
page = reader.pages[page_num]
text = page.extract_text()
if text:
full_text += text + "\n"
question_pattern = re.compile(
r'Question\s+#(\d+)\s*\n(.*?)(?=Question\s+#\d+|Topic\s+\d+|$)',
re.DOTALL | re.IGNORECASE
)
matches = question_pattern.findall(full_text)
for match in matches:
q_num = int(match[0])
content = match[1].strip()
question_data = parse_question_content(topic_num, q_num, content)
if question_data:
questions.append(question_data)
return questions
def parse_question_content(topic_num, q_num, content):
"""
解析题目内容,提取题干、选项和答案
"""
lines = content.split('\n')
question_stem = ""
options = []
correct_answer = ""
option_pattern = re.compile(r'^([A-Z])\.\s*(.*)', re.IGNORECASE)
answer_inline_pattern = re.compile(r'\s+([A-Z]{1,4})\s*$')
answer_pattern = re.compile(r'Correct Answer:\s*([A-Z,\s]+)', re.IGNORECASE)
comments_pattern = re.compile(r'^Comments', re.IGNORECASE)
current_section = "stem"
current_option = None
current_option_text = ""
for line in lines:
line = line.strip()
if not line:
continue
if comments_pattern.match(line):
break
answer_match = answer_pattern.search(line)
if answer_match:
correct_answer = answer_match.group(1).strip().upper()
line = answer_pattern.sub('', line).strip()
if not line:
continue
option_match = option_pattern.match(line)
if option_match:
if current_option is not None and current_option_text:
options.append({
'label': current_option,
'text': current_option_text.strip()
})
current_option = option_match.group(1).upper()
current_option_text = option_match.group(2)
current_section = "options"
elif current_section == "options" and current_option is not None:
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
current_option_text += " " + line
elif current_section == "stem":
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
question_stem += " " + line
if current_option is not None and current_option_text:
inline_answer = answer_inline_pattern.search(current_option_text)
if inline_answer and not correct_answer:
possible_answer = inline_answer.group(1)
if all(c in 'ABCD' for c in possible_answer):
correct_answer = possible_answer
current_option_text = answer_inline_pattern.sub('', current_option_text)
options.append({
'label': current_option,
'text': current_option_text.strip()
})
question_stem = question_stem.strip()
if not question_stem and not options:
return None
return {
'topic': topic_num,
'question_num': q_num,
'stem': question_stem,
'options': options,
'answer': correct_answer
}
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
topics_info_path = '/Users/duguoyou/D365/topics_info.json'
output_dir = '/Users/duguoyou/D365/exam_data'
questions = split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir)

155
extract_questions_v2.py Normal file
View File

@@ -0,0 +1,155 @@
#!/usr/bin/env python3
"""
精确提取PDF题目内容 - 改进版
"""
import re
import json
import os
from pypdf import PdfReader, PdfWriter
def split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir):
"""
按Topic切割PDF并精确提取题目内容
"""
with open(topics_info_path, 'r', encoding='utf-8') as f:
topics = json.load(f)
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
os.makedirs(output_dir, exist_ok=True)
pdf_dir = os.path.join(output_dir, 'pdfs')
os.makedirs(pdf_dir, exist_ok=True)
all_questions = []
for topic in topics:
topic_num = topic['topic_num']
start_page = topic['start_page']
end_page = topic['end_page']
writer = PdfWriter()
for page_num in range(start_page, min(end_page + 1, total_pages)):
writer.add_page(reader.pages[page_num])
pdf_output_path = os.path.join(pdf_dir, f'topic_{topic_num:02d}.pdf')
with open(pdf_output_path, 'wb') as f:
writer.write(f)
print(f"已保存: {pdf_output_path}")
print(f"正在提取 Topic {topic_num} 的题目内容...")
topic_questions = extract_questions_precise(reader, start_page, end_page, topic_num)
all_questions.extend(topic_questions)
print(f" Topic {topic_num}: 提取了 {len(topic_questions)} 道题")
questions_json_path = os.path.join(output_dir, 'questions.json')
with open(questions_json_path, 'w', encoding='utf-8') as f:
json.dump(all_questions, f, ensure_ascii=False, indent=2)
print(f"\n所有题目已保存到: {questions_json_path}")
print(f"总共提取了 {len(all_questions)} 道题")
return all_questions
def extract_questions_precise(reader, start_page, end_page, topic_num):
"""
精确提取题目内容
"""
questions = []
full_text = ""
for page_num in range(start_page, end_page + 1):
page = reader.pages[page_num]
text = page.extract_text()
if text:
full_text += text + "\n"
question_pattern = re.compile(
r'Question\s+#(\d+)\s*\n(.*?)(?=Question\s+#\d+|Topic\s+\d+|$)',
re.DOTALL | re.IGNORECASE
)
matches = question_pattern.findall(full_text)
for match in matches:
q_num = int(match[0])
content = match[1].strip()
question_data = parse_question_content(topic_num, q_num, content)
if question_data:
questions.append(question_data)
return questions
def parse_question_content(topic_num, q_num, content):
"""
解析题目内容,提取题干、选项和答案
"""
lines = content.split('\n')
question_stem = ""
options = []
correct_answer = ""
option_pattern = re.compile(r'^([A-Z])\.\s*(.*)', re.IGNORECASE)
answer_pattern = re.compile(r'Correct Answer:\s*([A-Z,\s]+)', re.IGNORECASE)
comments_pattern = re.compile(r'^Comments', re.IGNORECASE)
current_section = "stem"
current_option = None
current_option_text = ""
for line in lines:
line = line.strip()
if not line:
continue
if comments_pattern.match(line):
break
answer_match = answer_pattern.search(line)
if answer_match:
correct_answer = answer_match.group(1).strip().upper()
continue
option_match = option_pattern.match(line)
if option_match:
if current_option is not None and current_option_text:
options.append({
'label': current_option,
'text': current_option_text.strip()
})
current_option = option_match.group(1).upper()
current_option_text = option_match.group(2)
current_section = "options"
elif current_section == "options" and current_option is not None:
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote')):
current_option_text += " " + line
elif current_section == "stem":
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
question_stem += " " + line
if current_option is not None and current_option_text:
options.append({
'label': current_option,
'text': current_option_text.strip()
})
question_stem = question_stem.strip()
if not question_stem and not options:
return None
return {
'topic': topic_num,
'question_num': q_num,
'stem': question_stem,
'options': options,
'answer': correct_answer
}
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
topics_info_path = '/Users/duguoyou/D365/topics_info.json'
output_dir = '/Users/duguoyou/D365/exam_data'
questions = split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir)

157
extract_questions_v3.py Normal file
View File

@@ -0,0 +1,157 @@
#!/usr/bin/env python3
"""
精确提取PDF题目内容 - 最终版
"""
import re
import json
import os
from pypdf import PdfReader, PdfWriter
def split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir):
"""
按Topic切割PDF并精确提取题目内容
"""
with open(topics_info_path, 'r', encoding='utf-8') as f:
topics = json.load(f)
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
os.makedirs(output_dir, exist_ok=True)
pdf_dir = os.path.join(output_dir, 'pdfs')
os.makedirs(pdf_dir, exist_ok=True)
all_questions = []
for topic in topics:
topic_num = topic['topic_num']
start_page = topic['start_page']
end_page = topic['end_page']
writer = PdfWriter()
for page_num in range(start_page, min(end_page + 1, total_pages)):
writer.add_page(reader.pages[page_num])
pdf_output_path = os.path.join(pdf_dir, f'topic_{topic_num:02d}.pdf')
with open(pdf_output_path, 'wb') as f:
writer.write(f)
print(f"已保存: {pdf_output_path}")
print(f"正在提取 Topic {topic_num} 的题目内容...")
topic_questions = extract_questions_precise(reader, start_page, end_page, topic_num)
all_questions.extend(topic_questions)
print(f" Topic {topic_num}: 提取了 {len(topic_questions)} 道题")
questions_json_path = os.path.join(output_dir, 'questions.json')
with open(questions_json_path, 'w', encoding='utf-8') as f:
json.dump(all_questions, f, ensure_ascii=False, indent=2)
print(f"\n所有题目已保存到: {questions_json_path}")
print(f"总共提取了 {len(all_questions)} 道题")
return all_questions
def extract_questions_precise(reader, start_page, end_page, topic_num):
"""
精确提取题目内容
"""
questions = []
full_text = ""
for page_num in range(start_page, end_page + 1):
page = reader.pages[page_num]
text = page.extract_text()
if text:
full_text += text + "\n"
question_pattern = re.compile(
r'Question\s+#(\d+)\s*\n(.*?)(?=Question\s+#\d+|Topic\s+\d+|$)',
re.DOTALL | re.IGNORECASE
)
matches = question_pattern.findall(full_text)
for match in matches:
q_num = int(match[0])
content = match[1].strip()
question_data = parse_question_content(topic_num, q_num, content)
if question_data:
questions.append(question_data)
return questions
def parse_question_content(topic_num, q_num, content):
"""
解析题目内容,提取题干、选项和答案
"""
lines = content.split('\n')
question_stem = ""
options = []
correct_answer = ""
option_pattern = re.compile(r'^([A-Z])\.\s*(.*)', re.IGNORECASE)
answer_pattern = re.compile(r'Correct Answer:\s*([A-Z,\s]+)', re.IGNORECASE)
comments_pattern = re.compile(r'^Comments', re.IGNORECASE)
current_section = "stem"
current_option = None
current_option_text = ""
for line in lines:
line = line.strip()
if not line:
continue
if comments_pattern.match(line):
break
answer_match = answer_pattern.search(line)
if answer_match:
correct_answer = answer_match.group(1).strip().upper()
line = answer_pattern.sub('', line).strip()
if not line:
continue
option_match = option_pattern.match(line)
if option_match:
if current_option is not None and current_option_text:
options.append({
'label': current_option,
'text': current_option_text.strip()
})
current_option = option_match.group(1).upper()
current_option_text = option_match.group(2)
current_section = "options"
elif current_section == "options" and current_option is not None:
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
current_option_text += " " + line
elif current_section == "stem":
if not line.startswith(('Most Voted', 'upvoted', 'Selected Answer:', 'Community vote', 'Correct Answer')):
question_stem += " " + line
if current_option is not None and current_option_text:
options.append({
'label': current_option,
'text': current_option_text.strip()
})
question_stem = question_stem.strip()
if not question_stem and not options:
return None
return {
'topic': topic_num,
'question_num': q_num,
'stem': question_stem,
'options': options,
'answer': correct_answer
}
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
topics_info_path = '/Users/duguoyou/D365/topics_info.json'
output_dir = '/Users/duguoyou/D365/exam_data'
questions = split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir)

108
split_pdf.py Normal file
View File

@@ -0,0 +1,108 @@
#!/usr/bin/env python3
"""
切割PDF文件并提取题目内容
"""
import re
import json
import os
from pypdf import PdfReader, PdfWriter
def split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir):
"""
按Topic切割PDF并提取题目内容
"""
with open(topics_info_path, 'r', encoding='utf-8') as f:
topics = json.load(f)
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
os.makedirs(output_dir, exist_ok=True)
pdf_dir = os.path.join(output_dir, 'pdfs')
os.makedirs(pdf_dir, exist_ok=True)
all_questions = []
for topic in topics:
topic_num = topic['topic_num']
start_page = topic['start_page']
end_page = topic['end_page']
writer = PdfWriter()
for page_num in range(start_page, min(end_page + 1, total_pages)):
writer.add_page(reader.pages[page_num])
pdf_output_path = os.path.join(pdf_dir, f'topic_{topic_num:02d}.pdf')
with open(pdf_output_path, 'wb') as f:
writer.write(f)
print(f"已保存: {pdf_output_path}")
print(f"正在提取 Topic {topic_num} 的题目内容...")
topic_questions = extract_questions_from_pages(reader, start_page, end_page, topic_num)
all_questions.extend(topic_questions)
print(f" Topic {topic_num}: 提取了 {len(topic_questions)} 道题")
questions_json_path = os.path.join(output_dir, 'questions.json')
with open(questions_json_path, 'w', encoding='utf-8') as f:
json.dump(all_questions, f, ensure_ascii=False, indent=2)
print(f"\n所有题目已保存到: {questions_json_path}")
print(f"总共提取了 {len(all_questions)} 道题")
return all_questions
def extract_questions_from_pages(reader, start_page, end_page, topic_num):
"""
从指定页面范围提取题目内容
"""
questions = []
current_question = None
question_pattern = re.compile(r'Question\s+#(\d+)', re.IGNORECASE)
for page_num in range(start_page, end_page + 1):
page = reader.pages[page_num]
text = page.extract_text()
if not text:
continue
lines = text.split('\n')
for line in lines:
line = line.strip()
if not line:
continue
q_match = question_pattern.search(line)
if q_match:
if current_question:
questions.append(current_question)
q_num = int(q_match.group(1))
current_question = {
'topic': topic_num,
'question_num': q_num,
'content': line,
'options': [],
'answer': None,
'explanation': None
}
elif current_question:
if line.startswith('A.') or line.startswith('B.') or line.startswith('C.') or line.startswith('D.'):
current_question['options'].append(line)
elif line.startswith('Correct Answer:'):
current_question['answer'] = line.replace('Correct Answer:', '').strip()
elif line.startswith('Comments'):
current_question['explanation'] = ''
elif current_question.get('explanation') is not None:
current_question['explanation'] += ' ' + line
if current_question:
questions.append(current_question)
return questions
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
topics_info_path = '/Users/duguoyou/D365/topics_info.json'
output_dir = '/Users/duguoyou/D365/exam_data'
questions = split_pdf_and_extract_questions(pdf_path, topics_info_path, output_dir)

610
topics_info.json Normal file
View File

@@ -0,0 +1,610 @@
[
{
"topic_num": 1,
"start_page": 0,
"end_page": 71,
"question_count": 36,
"questions": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36
]
},
{
"topic_num": 2,
"start_page": 73,
"end_page": 233,
"question_count": 64,
"questions": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64
]
},
{
"topic_num": 3,
"start_page": 235,
"end_page": 456,
"question_count": 99,
"questions": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99
]
},
{
"topic_num": 4,
"start_page": 458,
"end_page": 637,
"question_count": 77,
"questions": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77
]
},
{
"topic_num": 5,
"start_page": 639,
"end_page": 779,
"question_count": 63,
"questions": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63
]
},
{
"topic_num": 6,
"start_page": 781,
"end_page": 911,
"question_count": 53,
"questions": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53
]
},
{
"topic_num": 7,
"start_page": 913,
"end_page": 920,
"question_count": 4,
"questions": [
1,
2,
3,
4
]
},
{
"topic_num": 8,
"start_page": 921,
"end_page": 921,
"question_count": 1,
"questions": [
1
]
},
{
"topic_num": 9,
"start_page": 925,
"end_page": 925,
"question_count": 1,
"questions": [
1
]
},
{
"topic_num": 10,
"start_page": 929,
"end_page": 941,
"question_count": 6,
"questions": [
1,
2,
3,
4,
5,
6
]
},
{
"topic_num": 11,
"start_page": 942,
"end_page": 942,
"question_count": 1,
"questions": [
1
]
},
{
"topic_num": 12,
"start_page": 946,
"end_page": 950,
"question_count": 2,
"questions": [
1,
2
]
},
{
"topic_num": 13,
"start_page": 952,
"end_page": 966,
"question_count": 7,
"questions": [
1,
2,
3,
4,
5,
6,
7
]
},
{
"topic_num": 14,
"start_page": 968,
"end_page": 975,
"question_count": 3,
"questions": [
1,
2,
3
]
},
{
"topic_num": 15,
"start_page": 977,
"end_page": 984,
"question_count": 3,
"questions": [
1,
2,
3
]
},
{
"topic_num": 16,
"start_page": 986,
"end_page": 996,
"question_count": 4,
"questions": [
1,
2,
3,
4
]
},
{
"topic_num": 17,
"start_page": 999,
"end_page": 999,
"question_count": 1,
"questions": [
1
]
},
{
"topic_num": 18,
"start_page": 1002,
"end_page": 1013,
"question_count": 5,
"questions": [
1,
2,
3,
4,
5
]
},
{
"topic_num": 19,
"start_page": 1015,
"end_page": 1024,
"question_count": 4,
"questions": [
1,
2,
3,
4
]
},
{
"topic_num": 20,
"start_page": 1026,
"end_page": 1030,
"question_count": 2,
"questions": [
1,
2
]
},
{
"topic_num": 21,
"start_page": 1032,
"end_page": 1040,
"question_count": 4,
"questions": [
1,
2,
3,
4
]
}
]

130
translate_aliyun.py Normal file
View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
"""
使用阿里百炼API翻译题目内容
"""
import json
import os
import time
import urllib.request
import urllib.parse
import ssl
API_KEY = "sk-74905419d30541d18991396892bb27b0"
API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
def translate_text(text):
"""
使用阿里百炼API翻译文本
"""
if not text or not text.strip():
return text
prompt = f"""请将以下英文翻译成中文,保持专业术语的准确性,直接输出翻译结果,不要添加任何解释:
英文原文:
{text}
中文翻译:"""
try:
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {API_KEY}'
}
data = {
"model": "qwen-turbo",
"input": {
"messages": [
{
"role": "user",
"content": prompt
}
]
},
"parameters": {
"temperature": 0.1,
"max_tokens": 2000
}
}
req = urllib.request.Request(
API_URL,
data=json.dumps(data).encode('utf-8'),
headers=headers,
method='POST'
)
ssl_context = ssl.create_default_context()
with urllib.request.urlopen(req, context=ssl_context, timeout=60) as response:
result = json.loads(response.read().decode('utf-8'))
if result and 'output' in result and 'text' in result['output']:
translated = result['output']['text'].strip()
if translated.startswith('中文翻译:'):
translated = translated[6:].strip()
return translated
else:
print(f"API响应格式错误: {result}")
return text
except Exception as e:
print(f"翻译错误: {e}")
return text
def translate_questions(input_file, output_file, start_index=0, batch_size=10):
"""
翻译题目,支持分批处理
"""
if os.path.exists(output_file):
with open(output_file, 'r', encoding='utf-8') as f:
questions = json.load(f)
else:
with open(input_file, 'r', encoding='utf-8') as f:
questions = json.load(f)
total = len(questions)
end_index = min(start_index + batch_size, total)
print(f"正在翻译第 {start_index + 1}{end_index} 题,共 {total}")
for i in range(start_index, end_index):
q = questions[i]
print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
if 'stem_cn' not in q or not q['stem_cn'] or q['stem_cn'] == '待翻译...':
print(f" 翻译题干...")
q['stem_cn'] = translate_text(q['stem'])
time.sleep(0.5)
for j, opt in enumerate(q['options']):
if 'text_cn' not in opt or not opt['text_cn'] or opt['text_cn'] == '待翻译...':
print(f" 翻译选项 {opt['label']}...")
opt['text_cn'] = translate_text(opt['text'])
time.sleep(0.3)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(questions, f, ensure_ascii=False, indent=2)
print(f"\n翻译进度: {end_index}/{total}")
if end_index < total:
print(f"继续翻译请运行: python3 translate_aliyun.py --start {end_index}")
else:
print("所有题目翻译完成!")
return questions
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--start', type=int, default=0, help='起始索引')
parser.add_argument('--batch', type=int, default=10, help='批量大小')
args = parser.parse_args()
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
translate_questions(input_file, output_file, args.start, args.batch)

116
translate_all.py Normal file
View File

@@ -0,0 +1,116 @@
#!/usr/bin/env python3
"""
自动翻译所有题目
"""
import json
import os
import time
import urllib.request
import ssl
API_KEY = "sk-74905419d30541d18991396892bb27b0"
API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
def translate_text(text):
"""
使用阿里百炼API翻译文本
"""
if not text or not text.strip():
return text
prompt = f"""请将以下英文翻译成中文,保持专业术语的准确性,直接输出翻译结果,不要添加任何解释:
英文原文:
{text}
中文翻译:"""
try:
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {API_KEY}'
}
data = {
"model": "qwen-turbo",
"input": {
"messages": [
{
"role": "user",
"content": prompt
}
]
},
"parameters": {
"temperature": 0.1,
"max_tokens": 2000
}
}
req = urllib.request.Request(
API_URL,
data=json.dumps(data).encode('utf-8'),
headers=headers,
method='POST'
)
ssl_context = ssl.create_default_context()
with urllib.request.urlopen(req, context=ssl_context, timeout=60) as response:
result = json.loads(response.read().decode('utf-8'))
if result and 'output' in result and 'text' in result['output']:
translated = result['output']['text'].strip()
if translated.startswith('中文翻译:'):
translated = translated[6:].strip()
return translated
else:
print(f"API响应格式错误: {result}")
return text
except Exception as e:
print(f"翻译错误: {e}")
return text
def translate_all_questions(input_file, output_file):
"""
翻译所有题目
"""
if os.path.exists(output_file):
with open(output_file, 'r', encoding='utf-8') as f:
questions = json.load(f)
else:
with open(input_file, 'r', encoding='utf-8') as f:
questions = json.load(f)
total = len(questions)
for i, q in enumerate(questions):
print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
need_save = False
if 'stem_cn' not in q or not q['stem_cn'] or q['stem_cn'] == '待翻译...':
print(f" 翻译题干...")
q['stem_cn'] = translate_text(q['stem'])
need_save = True
time.sleep(0.3)
for opt in q['options']:
if 'text_cn' not in opt or not opt['text_cn'] or opt['text_cn'] == '待翻译...':
print(f" 翻译选项 {opt['label']}...")
opt['text_cn'] = translate_text(opt['text'])
need_save = True
time.sleep(0.2)
if need_save:
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(questions, f, ensure_ascii=False, indent=2)
print(f"\n所有 {total} 道题目翻译完成!")
if __name__ == '__main__':
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
translate_all_questions(input_file, output_file)

99
translate_api.py Normal file
View File

@@ -0,0 +1,99 @@
#!/usr/bin/env python3
"""
翻译题目内容 - 使用翻译API
"""
import json
import os
import time
import urllib.request
import urllib.parse
import ssl
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
def translate_text(text, target_lang='zh-CN'):
"""
使用Google Translate API翻译文本
"""
if not text or not text.strip():
return text
try:
base_url = "https://translate.googleapis.com/translate_a/single"
params = {
'client': 'gtx',
'sl': 'en',
'tl': target_lang,
'dt': 't',
'q': text
}
url = base_url + '?' + urllib.parse.urlencode(params)
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0')
with urllib.request.urlopen(req, context=ssl_context, timeout=30) as response:
result = json.loads(response.read().decode('utf-8'))
if result and result[0]:
translated = ''.join([item[0] for item in result[0] if item[0]])
return translated
except Exception as e:
print(f"翻译错误: {e}")
return text
return text
def translate_questions(input_file, output_file, start_index=0, batch_size=50):
"""
翻译题目,支持分批处理
"""
with open(input_file, 'r', encoding='utf-8') as f:
questions = json.load(f)
total = len(questions)
end_index = min(start_index + batch_size, total)
print(f"正在翻译第 {start_index + 1}{end_index} 题,共 {total}")
for i in range(start_index, end_index):
q = questions[i]
print(f"翻译 Topic {q['topic']} - Question {q['question_num']} ({i+1}/{total})")
if 'stem_cn' not in q or not q['stem_cn']:
q['stem_cn'] = translate_text(q['stem'])
time.sleep(0.5)
for opt in q['options']:
if 'text_cn' not in opt or not opt['text_cn']:
opt['text_cn'] = translate_text(opt['text'])
time.sleep(0.3)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(questions, f, ensure_ascii=False, indent=2)
print(f"翻译进度: {end_index}/{total}")
if end_index < total:
print(f"继续翻译请运行: python3 translate_api.py --start {end_index}")
return questions
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--start', type=int, default=0, help='起始索引')
parser.add_argument('--batch', type=int, default=50, help='批量大小')
args = parser.parse_args()
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
if os.path.exists(output_file):
input_file = output_file
translate_questions(input_file, output_file, args.start, args.batch)

52
translate_questions.py Normal file
View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python3
"""
翻译题目内容
"""
import json
import os
import re
def translate_text(text):
"""
翻译文本 - 使用简单的词典替换方式
这里提供一个框架实际翻译需要使用翻译API
"""
return text
def translate_questions(input_file, output_file):
"""
翻译所有题目
"""
with open(input_file, 'r', encoding='utf-8') as f:
questions = json.load(f)
translated_questions = []
for q in questions:
translated_q = {
'topic': q['topic'],
'question_num': q['question_num'],
'stem_en': q['stem'],
'stem_cn': translate_text(q['stem']),
'options': [],
'answer': q['answer']
}
for opt in q['options']:
translated_q['options'].append({
'label': opt['label'],
'text_en': opt['text'],
'text_cn': translate_text(opt['text'])
})
translated_questions.append(translated_q)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(translated_questions, f, ensure_ascii=False, indent=2)
print(f"翻译完成,保存到: {output_file}")
if __name__ == '__main__':
input_file = '/Users/duguoyou/D365/exam_data/questions.json'
output_file = '/Users/duguoyou/D365/exam_data/questions_translated.json'
translate_questions(input_file, output_file)

28
view_pdf.py Normal file
View File

@@ -0,0 +1,28 @@
#!/usr/bin/env python3
"""
分析PDF文件结构查看前几页内容
"""
from PyPDF2 import PdfReader
def view_pdf_content(pdf_path, start_page=0, end_page=5):
"""
查看PDF指定页面的内容
"""
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
print(f"PDF总页数: {total_pages}")
for page_num in range(start_page, min(end_page, total_pages)):
page = reader.pages[page_num]
text = page.extract_text()
print(f"\n{'='*60}")
print(f"{page_num + 1} 页:")
print('='*60)
if text:
print(text[:2000])
else:
print("(无文本内容)")
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
view_pdf_content(pdf_path, 0, 3)