Files
d365scm/view_pdf.py
2026-03-21 09:12:47 +08:00

29 lines
796 B
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
分析PDF文件结构查看前几页内容
"""
from PyPDF2 import PdfReader
def view_pdf_content(pdf_path, start_page=0, end_page=5):
"""
查看PDF指定页面的内容
"""
reader = PdfReader(pdf_path)
total_pages = len(reader.pages)
print(f"PDF总页数: {total_pages}")
for page_num in range(start_page, min(end_page, total_pages)):
page = reader.pages[page_num]
text = page.extract_text()
print(f"\n{'='*60}")
print(f"{page_num + 1} 页:")
print('='*60)
if text:
print(text[:2000])
else:
print("(无文本内容)")
if __name__ == '__main__':
pdf_path = '/Users/duguoyou/D365/MB-330_with_discussion.pdf'
view_pdf_content(pdf_path, 0, 3)