#!/usr/bin/env python3 """ 分析PDF文件结构,查看前几页内容 """ from PyPDF2 import PdfReader def view_pdf_content(pdf_path, start_page=0, end_page=5): """ 查看PDF指定页面的内容 """ reader = PdfReader(pdf_path) total_pages = len(reader.pages) print(f"PDF总页数: {total_pages}") for page_num in range(start_page, min(end_page, total_pages)): page = reader.pages[page_num] text = page.extract_text() print(f"\n{'='*60}") print(f"第 {page_num + 1} 页:") print('='*60) if text: print(text[:2000]) else: print("(无文本内容)") if __name__ == '__main__': pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf' view_pdf_content(pdf_path, 0, 3)